- u32
- driverinit
- Control the size (in packets) of the hairpin queues.
+ * - ``pcie_cong_inbound_high``
+ - u16
+ - driverinit
+ - High threshold configuration for PCIe congestion events. The firmware
+ will send an event once device side inbound PCIe traffic went
+ above the configured high threshold for a long enough period (at least
+ 200ms).
+
+ See pci_bw_inbound_high ethtool stat.
+
+ Units are 0.01 %. Accepted values are in range [0, 10000].
+ pcie_cong_inbound_low < pcie_cong_inbound_high.
+ Default value: 9000 (Corresponds to 90%).
+ * - ``pcie_cong_inbound_low``
+ - u16
+ - driverinit
+ - Low threshold configuration for PCIe congestion events. The firmware
+ will send an event once device side inbound PCIe traffic went
+ below the configured low threshold, only after having been previously in
+ a congested state.
+
+ See pci_bw_inbound_low ethtool stat.
+
+ Units are 0.01 %. Accepted values are in range [0, 10000].
+ pcie_cong_inbound_low < pcie_cong_inbound_high.
+ Default value: 7500.
+ * - ``pcie_cong_outbound_high``
+ - u16
+ - driverinit
+ - High threshold configuration for PCIe congestion events. The firmware
+ will send an event once device side outbound PCIe traffic went
+ above the configured high threshold for a long enough period (at least
+ 200ms).
+
+ See pci_bw_outbound_high ethtool stat.
+
+ Units are 0.01 %. Accepted values are in range [0, 10000].
+ pcie_cong_outbound_low < pcie_cong_outbound_high.
+ Default value: 9000 (Corresponds to 90%).
+ * - ``pcie_cong_outbound_low``
+ - u16
+ - driverinit
+ - Low threshold configuration for PCIe congestion events. The firmware
+ will send an event once device side outbound PCIe traffic went
+ below the configured low threshold, only after having been previously in
+ a congested state.
+
+ See pci_bw_outbound_low ethtool stat.
+
+ Units are 0.01 %. Accepted values are in range [0, 10000].
+ pcie_cong_outbound_low < pcie_cong_outbound_high.
+ Default value: 7500.
* - ``cqe_compress_type``
- string
ARRAY_SIZE(mlx5_devlink_eth_params));
}
+#define MLX5_PCIE_CONG_THRESH_MAX 10000
+#define MLX5_PCIE_CONG_THRESH_DEF_LOW 7500
+#define MLX5_PCIE_CONG_THRESH_DEF_HIGH 9000
+
+static int
+mlx5_devlink_pcie_cong_thresh_validate(struct devlink *devl, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ if (val.vu16 > MLX5_PCIE_CONG_THRESH_MAX) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "Value %u > max supported (%u)",
+ val.vu16, MLX5_PCIE_CONG_THRESH_MAX);
+
+ return -EINVAL;
+ }
+
+ switch (id) {
+ case MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_LOW:
+ case MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_HIGH:
+ case MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_LOW:
+ case MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_HIGH:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void mlx5_devlink_pcie_cong_init_values(struct devlink *devlink)
+{
+ union devlink_param_value value;
+ u32 id;
+
+ value.vu16 = MLX5_PCIE_CONG_THRESH_DEF_LOW;
+ id = MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_LOW;
+ devl_param_driverinit_value_set(devlink, id, value);
+
+ value.vu16 = MLX5_PCIE_CONG_THRESH_DEF_HIGH;
+ id = MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_HIGH;
+ devl_param_driverinit_value_set(devlink, id, value);
+
+ value.vu16 = MLX5_PCIE_CONG_THRESH_DEF_LOW;
+ id = MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_LOW;
+ devl_param_driverinit_value_set(devlink, id, value);
+
+ value.vu16 = MLX5_PCIE_CONG_THRESH_DEF_HIGH;
+ id = MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_HIGH;
+ devl_param_driverinit_value_set(devlink, id, value);
+}
+
+static const struct devlink_param mlx5_devlink_pcie_cong_params[] = {
+ DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_LOW,
+ "pcie_cong_inbound_low", DEVLINK_PARAM_TYPE_U16,
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
+ mlx5_devlink_pcie_cong_thresh_validate),
+ DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_HIGH,
+ "pcie_cong_inbound_high", DEVLINK_PARAM_TYPE_U16,
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
+ mlx5_devlink_pcie_cong_thresh_validate),
+ DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_LOW,
+ "pcie_cong_outbound_low", DEVLINK_PARAM_TYPE_U16,
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
+ mlx5_devlink_pcie_cong_thresh_validate),
+ DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_HIGH,
+ "pcie_cong_outbound_high", DEVLINK_PARAM_TYPE_U16,
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
+ mlx5_devlink_pcie_cong_thresh_validate),
+};
+
+static int mlx5_devlink_pcie_cong_params_register(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ int err;
+
+ if (!mlx5_pcie_cong_event_supported(dev))
+ return 0;
+
+ err = devl_params_register(devlink, mlx5_devlink_pcie_cong_params,
+ ARRAY_SIZE(mlx5_devlink_pcie_cong_params));
+ if (err)
+ return err;
+
+ mlx5_devlink_pcie_cong_init_values(devlink);
+
+ return 0;
+}
+
+static void mlx5_devlink_pcie_cong_params_unregister(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (!mlx5_pcie_cong_event_supported(dev))
+ return;
+
+ devl_params_unregister(devlink, mlx5_devlink_pcie_cong_params,
+ ARRAY_SIZE(mlx5_devlink_pcie_cong_params));
+}
+
static int mlx5_devlink_enable_rdma_validate(struct devlink *devlink, u32 id,
union devlink_param_value val,
struct netlink_ext_ack *extack)
if (err)
goto max_uc_list_err;
+ err = mlx5_devlink_pcie_cong_params_register(devlink);
+ if (err)
+ goto pcie_cong_err;
+
err = mlx5_nv_param_register_dl_params(devlink);
if (err)
goto nv_param_err;
return 0;
nv_param_err:
+ mlx5_devlink_pcie_cong_params_unregister(devlink);
+pcie_cong_err:
mlx5_devlink_max_uc_list_params_unregister(devlink);
max_uc_list_err:
mlx5_devlink_auxdev_params_unregister(devlink);
void mlx5_devlink_params_unregister(struct devlink *devlink)
{
mlx5_nv_param_unregister_dl_params(devlink);
+ mlx5_devlink_pcie_cong_params_unregister(devlink);
mlx5_devlink_max_uc_list_params_unregister(devlink);
mlx5_devlink_auxdev_params_unregister(devlink);
devl_params_unregister(devlink, mlx5_devlink_params,
MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT,
MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES,
MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE,
+ MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_LOW,
+ MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_HIGH,
+ MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_LOW,
+ MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_HIGH,
MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE
};
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+#include "../devlink.h"
#include "en.h"
#include "pcie_cong_event.h"
struct mlx5e_pcie_cong_stats stats;
};
-/* In units of 0.01 % */
-static const struct mlx5e_pcie_cong_thresh default_thresh_config = {
- .inbound_high = 9000,
- .inbound_low = 7500,
- .outbound_high = 9000,
- .outbound_low = 7500,
-};
static const struct counter_desc mlx5e_pcie_cong_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
return NOTIFY_OK;
}
+static int
+mlx5e_pcie_cong_get_thresh_config(struct mlx5_core_dev *dev,
+ struct mlx5e_pcie_cong_thresh *config)
+{
+ u32 ids[4] = {
+ MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_LOW,
+ MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_HIGH,
+ MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_LOW,
+ MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_HIGH,
+ };
+ struct devlink *devlink = priv_to_devlink(dev);
+ union devlink_param_value val[4];
+
+ for (int i = 0; i < 4; i++) {
+ u32 id = ids[i];
+ int err;
+
+ err = devl_param_driverinit_value_get(devlink, id, &val[i]);
+ if (err)
+ return err;
+ }
+
+ config->inbound_low = val[0].vu16;
+ config->inbound_high = val[1].vu16;
+ config->outbound_low = val[2].vu16;
+ config->outbound_high = val[3].vu16;
+
+ return 0;
+}
+
+static int
+mlx5e_thresh_config_validate(struct mlx5_core_dev *mdev,
+ const struct mlx5e_pcie_cong_thresh *config)
+{
+ int err = 0;
+
+ if (config->inbound_low >= config->inbound_high) {
+ err = -EINVAL;
+ mlx5_core_err(mdev, "PCIe inbound congestion threshold configuration invalid: low (%u) >= high (%u).\n",
+ config->inbound_low, config->inbound_high);
+ }
+
+ if (config->outbound_low >= config->outbound_high) {
+ err = -EINVAL;
+ mlx5_core_err(mdev, "PCIe outbound congestion threshold configuration invalid: low (%u) >= high (%u).\n",
+ config->outbound_low, config->outbound_high);
+ }
+
+ return err;
+}
+
int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv)
{
+ struct mlx5e_pcie_cong_thresh thresh_config = {};
struct mlx5e_pcie_cong_event *cong_event;
struct mlx5_core_dev *mdev = priv->mdev;
int err;
if (!mlx5_pcie_cong_event_supported(mdev))
return 0;
+ err = mlx5e_pcie_cong_get_thresh_config(mdev, &thresh_config);
+ if (WARN_ON(err))
+ return err;
+
+ err = mlx5e_thresh_config_validate(mdev, &thresh_config);
+ if (err) {
+ mlx5_core_err(mdev, "PCIe congestion event feature disabled\n");
+ return err;
+ }
+
cong_event = kvzalloc_node(sizeof(*cong_event), GFP_KERNEL,
mdev->priv.numa_node);
if (!cong_event)
cong_event->priv = priv;
- err = mlx5_cmd_pcie_cong_event_set(mdev, &default_thresh_config,
+ err = mlx5_cmd_pcie_cong_event_set(mdev, &thresh_config,
&cong_event->obj_id);
if (err) {
mlx5_core_warn(mdev, "Error creating a PCIe congestion event object\n");