]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
net/mlx5: Fix command bitmask initialization
authorShay Drory <shayd@nvidia.com>
Tue, 15 Oct 2024 09:32:06 +0000 (12:32 +0300)
committerPaolo Abeni <pabeni@redhat.com>
Thu, 17 Oct 2024 10:14:07 +0000 (12:14 +0200)
Command bitmask have a dedicated bit for MANAGE_PAGES command, this bit
isn't Initialize during command bitmask Initialization, only during
MANAGE_PAGES.

In addition, mlx5_cmd_trigger_completions() is trying to trigger
completion for MANAGE_PAGES command as well.

Hence, in case health error occurred before any MANAGE_PAGES command
have been invoke (for example, during mlx5_enable_hca()),
mlx5_cmd_trigger_completions() will try to trigger completion for
MANAGE_PAGES command, which will result in null-ptr-deref error.[1]

Fix it by Initialize command bitmask correctly.

While at it, re-write the code for better understanding.

[1]
BUG: KASAN: null-ptr-deref in mlx5_cmd_trigger_completions+0x1db/0x600 [mlx5_core]
Write of size 4 at addr 0000000000000214 by task kworker/u96:2/12078
CPU: 10 PID: 12078 Comm: kworker/u96:2 Not tainted 6.9.0-rc2_for_upstream_debug_2024_04_07_19_01 #1
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
Workqueue: mlx5_health0000:08:00.0 mlx5_fw_fatal_reporter_err_work [mlx5_core]
Call Trace:
 <TASK>
 dump_stack_lvl+0x7e/0xc0
 kasan_report+0xb9/0xf0
 kasan_check_range+0xec/0x190
 mlx5_cmd_trigger_completions+0x1db/0x600 [mlx5_core]
 mlx5_cmd_flush+0x94/0x240 [mlx5_core]
 enter_error_state+0x6c/0xd0 [mlx5_core]
 mlx5_fw_fatal_reporter_err_work+0xf3/0x480 [mlx5_core]
 process_one_work+0x787/0x1490
 ? lockdep_hardirqs_on_prepare+0x400/0x400
 ? pwq_dec_nr_in_flight+0xda0/0xda0
 ? assign_work+0x168/0x240
 worker_thread+0x586/0xd30
 ? rescuer_thread+0xae0/0xae0
 kthread+0x2df/0x3b0
 ? kthread_complete_and_exit+0x20/0x20
 ret_from_fork+0x2d/0x70
 ? kthread_complete_and_exit+0x20/0x20
 ret_from_fork_asm+0x11/0x20
 </TASK>

Fixes: 9b98d395b85d ("net/mlx5: Start health poll at earlier stage of driver load")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
drivers/net/ethernet/mellanox/mlx5/core/cmd.c

index a64d96effb9eacc9640b704952f20d9ffbd17318..6bd8a18e3af3a1c6c78bd299e1811dd9b167125c 100644 (file)
@@ -1765,6 +1765,10 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
        }
 }
 
+#define MLX5_MAX_MANAGE_PAGES_CMD_ENT 1
+#define MLX5_CMD_MASK ((1UL << (cmd->vars.max_reg_cmds + \
+                          MLX5_MAX_MANAGE_PAGES_CMD_ENT)) - 1)
+
 static void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
 {
        struct mlx5_cmd *cmd = &dev->cmd;
@@ -1776,7 +1780,7 @@ static void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
        /* wait for pending handlers to complete */
        mlx5_eq_synchronize_cmd_irq(dev);
        spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
-       vector = ~dev->cmd.vars.bitmask & ((1ul << (1 << dev->cmd.vars.log_sz)) - 1);
+       vector = ~dev->cmd.vars.bitmask & MLX5_CMD_MASK;
        if (!vector)
                goto no_trig;
 
@@ -2361,7 +2365,7 @@ int mlx5_cmd_enable(struct mlx5_core_dev *dev)
 
        cmd->state = MLX5_CMDIF_STATE_DOWN;
        cmd->vars.max_reg_cmds = (1 << cmd->vars.log_sz) - 1;
-       cmd->vars.bitmask = (1UL << cmd->vars.max_reg_cmds) - 1;
+       cmd->vars.bitmask = MLX5_CMD_MASK;
 
        sema_init(&cmd->vars.sem, cmd->vars.max_reg_cmds);
        sema_init(&cmd->vars.pages_sem, 1);