enum {
        MLX5_NIC_IFC_FULL               = 0,
        MLX5_NIC_IFC_DISABLED           = 1,
-       MLX5_NIC_IFC_NO_DRAM_NIC        = 2
+       MLX5_NIC_IFC_NO_DRAM_NIC        = 2,
+       MLX5_NIC_IFC_INVALID            = 3
 };
 
 enum {
        MLX5_DROP_NEW_HEALTH_WORK,
 };
 
-static u8 get_nic_interface(struct mlx5_core_dev *dev)
+static u8 get_nic_state(struct mlx5_core_dev *dev)
 {
        return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3;
 }
        struct mlx5_core_health *health = &dev->priv.health;
        struct health_buffer __iomem *h = health->health;
 
-       if (get_nic_interface(dev) == MLX5_NIC_IFC_DISABLED)
+       if (get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
                return 1;
 
        if (ioread32be(&h->fw_ver) == 0xffffffff)
 
 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
 {
-       u8 nic_interface = get_nic_interface(dev);
+       u8 nic_interface = get_nic_state(dev);
 
        switch (nic_interface) {
        case MLX5_NIC_IFC_FULL:
        mlx5_disable_device(dev);
 }
 
+static void health_recover(struct work_struct *work)
+{
+       struct mlx5_core_health *health;
+       struct delayed_work *dwork;
+       struct mlx5_core_dev *dev;
+       struct mlx5_priv *priv;
+       u8 nic_state;
+
+       dwork = container_of(work, struct delayed_work, work);
+       health = container_of(dwork, struct mlx5_core_health, recover_work);
+       priv = container_of(health, struct mlx5_priv, health);
+       dev = container_of(priv, struct mlx5_core_dev, priv);
+
+       nic_state = get_nic_state(dev);
+       if (nic_state == MLX5_NIC_IFC_INVALID) {
+               dev_err(&dev->pdev->dev, "health recovery flow aborted since the nic state is invalid\n");
+               return;
+       }
+
+       dev_err(&dev->pdev->dev, "starting health recovery flow\n");
+       mlx5_recover_device(dev);
+}
+
+/* How much time to wait until health resetting the driver (in msecs) */
+#define MLX5_RECOVERY_DELAY_MSECS 60000
 static void health_care(struct work_struct *work)
 {
+       unsigned long recover_delay = msecs_to_jiffies(MLX5_RECOVERY_DELAY_MSECS);
        struct mlx5_core_health *health;
        struct mlx5_core_dev *dev;
        struct mlx5_priv *priv;
        dev = container_of(priv, struct mlx5_core_dev, priv);
        mlx5_core_warn(dev, "handling bad device here\n");
        mlx5_handle_bad_state(dev);
+
+       spin_lock(&health->wq_lock);
+       if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
+               schedule_delayed_work(&health->recover_work, recover_delay);
+       else
+               dev_err(&dev->pdev->dev,
+                       "new health works are not permitted at this stage\n");
+       spin_unlock(&health->wq_lock);
 }
 
 static const char *hsynd_str(u8 synd)
        spin_lock(&health->wq_lock);
        set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
        spin_unlock(&health->wq_lock);
+       cancel_delayed_work_sync(&health->recover_work);
        cancel_work_sync(&health->work);
 }
 
                return -ENOMEM;
        spin_lock_init(&health->wq_lock);
        INIT_WORK(&health->work, health_care);
+       INIT_DELAYED_WORK(&health->recover_work, health_recover);
 
        return 0;
 }
 
        struct mlx5_priv *priv = &dev->priv;
 
        dev_info(&pdev->dev, "%s was called\n", __func__);
+
        mlx5_enter_error_state(dev);
        mlx5_unload_one(dev, priv, false);
        /* In case of kernel call save the pci state and drain health wq */
        return PCI_ERS_RESULT_RECOVERED;
 }
 
-void mlx5_disable_device(struct mlx5_core_dev *dev)
-{
-       mlx5_pci_err_detected(dev->pdev, 0);
-}
-
 static void mlx5_pci_resume(struct pci_dev *pdev)
 {
        struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
 
 MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
 
+void mlx5_disable_device(struct mlx5_core_dev *dev)
+{
+       mlx5_pci_err_detected(dev->pdev, 0);
+}
+
+void mlx5_recover_device(struct mlx5_core_dev *dev)
+{
+       mlx5_pci_disable_device(dev);
+       if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
+               mlx5_pci_resume(dev->pdev);
+}
+
 static struct pci_driver mlx5_core_driver = {
        .name           = DRIVER_NAME,
        .id_table       = mlx5_core_pci_table,