The cited patch change register devlink flow, and neglect to reflect
the changes for peer devlink set logic. Peer devlink set is
triggering a call trace if done after devl_register.[1]
Hence, align peer devlink set logic with register devlink flow.
[1]
WARNING: CPU: 4 PID: 3394 at net/devlink/core.c:155 devlink_rel_nested_in_add+0x177/0x180
CPU: 4 PID: 3394 Comm: kworker/u40:1 Not tainted 6.9.0-rc4_for_linust_min_debug_2024_04_16_14_08 #1
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
Workqueue: mlx5_vhca_event0 mlx5_vhca_state_work_handler [mlx5_core]
RIP: 0010:devlink_rel_nested_in_add+0x177/0x180
Call Trace:
<TASK>
? __warn+0x78/0x120
? devlink_rel_nested_in_add+0x177/0x180
? report_bug+0x16d/0x180
? handle_bug+0x3c/0x60
? exc_invalid_op+0x14/0x70
? asm_exc_invalid_op+0x16/0x20
? devlink_port_init+0x30/0x30
? devlink_port_type_clear+0x50/0x50
? devlink_rel_nested_in_add+0x177/0x180
? devlink_rel_nested_in_add+0xdd/0x180
mlx5_sf_mdev_event+0x74/0xb0 [mlx5_core]
notifier_call_chain+0x35/0xb0
blocking_notifier_call_chain+0x3d/0x60
mlx5_blocking_notifier_call_chain+0x22/0x30 [mlx5_core]
mlx5_sf_dev_probe+0x185/0x3e0 [mlx5_core]
auxiliary_bus_probe+0x38/0x80
? driver_sysfs_add+0x51/0x80
really_probe+0xc5/0x3a0
? driver_probe_device+0x90/0x90
__driver_probe_device+0x80/0x160
driver_probe_device+0x1e/0x90
__device_attach_driver+0x7d/0x100
bus_for_each_drv+0x80/0xd0
__device_attach+0xbc/0x1f0
bus_probe_device+0x86/0xa0
device_add+0x64f/0x860
__auxiliary_device_add+0x3b/0xa0
mlx5_sf_dev_add+0x139/0x330 [mlx5_core]
mlx5_sf_dev_state_change_handler+0x1e4/0x250 [mlx5_core]
notifier_call_chain+0x35/0xb0
blocking_notifier_call_chain+0x3d/0x60
mlx5_vhca_state_work_handler+0x151/0x200 [mlx5_core]
process_one_work+0x13f/0x2e0
worker_thread+0x2bd/0x3c0
? rescuer_thread+0x410/0x410
kthread+0xc4/0xf0
? kthread_complete_and_exit+0x20/0x20
ret_from_fork+0x2d/0x50
? kthread_complete_and_exit+0x20/0x20
ret_from_fork_asm+0x11/0x20
</TASK>
Fixes: bf729988303a ("net/mlx5: Restore mistakenly dropped parts in register devlink flow")
Fixes: c6e77aa9dd82 ("net/mlx5: Register devlink first under devlink lock")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20240509112951.590184-3-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
struct devlink *devlink = priv_to_devlink(dev);
int err;
+ devl_lock(devlink);
+ devl_register(devlink);
dev->state = MLX5_DEVICE_STATE_UP;
err = mlx5_function_enable(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
if (err) {
goto query_hca_caps_err;
}
- devl_lock(devlink);
- devl_register(devlink);
-
err = mlx5_devlink_params_register(priv_to_devlink(dev));
if (err) {
mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err);
- goto params_reg_err;
+ goto query_hca_caps_err;
}
devl_unlock(devlink);
return 0;
-params_reg_err:
- devl_unregister(devlink);
- devl_unlock(devlink);
query_hca_caps_err:
- devl_unregister(devlink);
- devl_unlock(devlink);
mlx5_function_disable(dev, true);
out:
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+ devl_unregister(devlink);
+ devl_unlock(devlink);
return err;
}
goto remap_err;
}
+ /* Peer devlink logic expects to work on unregistered devlink instance. */
+ err = mlx5_core_peer_devlink_set(sf_dev, devlink);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_core_peer_devlink_set err=%d\n", err);
+ goto peer_devlink_set_err;
+ }
+
if (MLX5_ESWITCH_MANAGER(sf_dev->parent_mdev))
err = mlx5_init_one_light(mdev);
else
goto init_one_err;
}
- err = mlx5_core_peer_devlink_set(sf_dev, devlink);
- if (err) {
- mlx5_core_warn(mdev, "mlx5_core_peer_devlink_set err=%d\n", err);
- goto peer_devlink_set_err;
- }
-
return 0;
-peer_devlink_set_err:
- if (mlx5_dev_is_lightweight(sf_dev->mdev))
- mlx5_uninit_one_light(sf_dev->mdev);
- else
- mlx5_uninit_one(sf_dev->mdev);
init_one_err:
+peer_devlink_set_err:
iounmap(mdev->iseg);
remap_err:
mlx5_mdev_uninit(mdev);