]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
net/mlx5: Always drain health in shutdown callback
authorShay Drory <shayd@nvidia.com>
Tue, 30 Jul 2024 06:16:30 +0000 (09:16 +0300)
committerJakub Kicinski <kuba@kernel.org>
Thu, 1 Aug 2024 01:04:50 +0000 (18:04 -0700)
There is no point in recovery during device shutdown. if health
work started need to wait for it to avoid races and NULL pointer
access.

Hence, drain health WQ on shutdown callback.

Fixes: 1958fc2f0712 ("net/mlx5: SF, Add auxiliary device driver")
Fixes: d2aa060d40fa ("net/mlx5: Cancel health poll before sending panic teardown command")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
Link: https://patch.msgid.link/20240730061638.1831002-2-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c

index 527da58c79535b5895d5c2045409279174c2d506..5b7e6f4b5c7ea10efd4b47eb621726482e83bb72 100644 (file)
@@ -2142,7 +2142,6 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
        /* Panic tear down fw command will stop the PCI bus communication
         * with the HCA, so the health poll is no longer needed.
         */
-       mlx5_drain_health_wq(dev);
        mlx5_stop_health_poll(dev, false);
 
        ret = mlx5_cmd_fast_teardown_hca(dev);
@@ -2177,6 +2176,7 @@ static void shutdown(struct pci_dev *pdev)
 
        mlx5_core_info(dev, "Shutdown was called\n");
        set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
+       mlx5_drain_health_wq(dev);
        err = mlx5_try_fast_unload(dev);
        if (err)
                mlx5_unload_one(dev, false);
index b2986175d9afe80abb0e109fcfe56590f14b0cf7..b706f1486504a7c90049d9071c6541a7016067de 100644 (file)
@@ -112,6 +112,7 @@ static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev)
        struct mlx5_core_dev *mdev = sf_dev->mdev;
 
        set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state);
+       mlx5_drain_health_wq(mdev);
        mlx5_unload_one(mdev, false);
 }