]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
net/mlx5: Fix use-after-free in self-healing flow
authorJack Morgenstein <jackm@dev.mellanox.co.il>
Sun, 5 Aug 2018 06:19:33 +0000 (09:19 +0300)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 26 Sep 2018 06:37:57 +0000 (08:37 +0200)
[ Upstream commit 76d5581c870454be5f1f1a106c57985902e7ea20 ]

When the mlx5 health mechanism detects a problem while the driver
is in the middle of init_one or remove_one, the driver needs to prevent
the health mechanism from scheduling future work; if future work
is scheduled, there is a problem with use-after-free: the system WQ
tries to run the work item (which has been freed) at the scheduled
future time.

Prevent this by disabling work item scheduling in the health mechanism
when the driver is in the middle of init_one() or remove_one().

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Reviewed-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
include/linux/mlx5/driver.h

index db86e1506c8b67fa8849940c4adc958fb783ac92..61f284966a8c99e506f3c0d0f76acafa199a05ab 100644 (file)
@@ -333,9 +333,17 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
        add_timer(&health->timer);
 }
 
-void mlx5_stop_health_poll(struct mlx5_core_dev *dev)
+void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
 {
        struct mlx5_core_health *health = &dev->priv.health;
+       unsigned long flags;
+
+       if (disable_health) {
+               spin_lock_irqsave(&health->wq_lock, flags);
+               set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+               set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+               spin_unlock_irqrestore(&health->wq_lock, flags);
+       }
 
        del_timer_sync(&health->timer);
 }
index 4ddd632d10f9958c34238329f14faed02a021243..68e48b188458807d609b9d7e35e28410b2603606 100644 (file)
@@ -1227,7 +1227,7 @@ err_cleanup_once:
                mlx5_cleanup_once(dev);
 
 err_stop_poll:
-       mlx5_stop_health_poll(dev);
+       mlx5_stop_health_poll(dev, boot);
        if (mlx5_cmd_teardown_hca(dev)) {
                dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
                goto out_err;
@@ -1286,7 +1286,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
        mlx5_free_irq_vectors(dev);
        if (cleanup)
                mlx5_cleanup_once(dev);
-       mlx5_stop_health_poll(dev);
+       mlx5_stop_health_poll(dev, cleanup);
        err = mlx5_cmd_teardown_hca(dev);
        if (err) {
                dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
@@ -1548,7 +1548,7 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
         * with the HCA, so the health polll is no longer needed.
         */
        mlx5_drain_health_wq(dev);
-       mlx5_stop_health_poll(dev);
+       mlx5_stop_health_poll(dev, false);
 
        ret = mlx5_cmd_force_teardown_hca(dev);
        if (ret) {
index f2f9e957bf1b5e15cef34f84683f57d944f9e8b2..c4d19e77fea8baa66662635192d7f6cd873fec4e 100644 (file)
@@ -950,7 +950,7 @@ int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
 void mlx5_health_cleanup(struct mlx5_core_dev *dev);
 int mlx5_health_init(struct mlx5_core_dev *dev);
 void mlx5_start_health_poll(struct mlx5_core_dev *dev);
-void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
+void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health);
 void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
 void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
 void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);