mlx5_core_warn(dev, "handling bad device here\n");
        mlx5_handle_bad_state(dev);
  
 -      spin_lock(&health->wq_lock);
 +      spin_lock_irqsave(&health->wq_lock, flags);
-       if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
+       if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags))
                schedule_delayed_work(&health->recover_work, recover_delay);
        else
                dev_err(&dev->pdev->dev,
  void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
  {
        struct mlx5_core_health *health = &dev->priv.health;
 +      unsigned long flags;
  
 -      spin_lock(&health->wq_lock);
 +      spin_lock_irqsave(&health->wq_lock, flags);
        set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
 -      spin_unlock(&health->wq_lock);
+       set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
 +      spin_unlock_irqrestore(&health->wq_lock, flags);
        cancel_delayed_work_sync(&health->recover_work);
        cancel_work_sync(&health->work);
  }
 
  void mlx5_start_health_poll(struct mlx5_core_dev *dev);
  void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
  void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
 +void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
+ void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
  int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
                        struct mlx5_buf *buf, int node);
  int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);