From 58030c76cce473b6cfd630bbecb97215def0dff8 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 5 Jun 2023 13:33:23 +0300 Subject: RDMA/cma: Always set static rate to 0 for RoCE Set static rate to 0 as it should be discovered by path query and has no meaning for RoCE. This also avoid of using the rtnl lock and ethtool API, which is a bottleneck when try to setup many rdma-cm connections at the same time, especially with multiple processes. Fixes: 3c86aa70bf67 ("RDMA/cm: Add RDMA CM support for IBoE devices") Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/f72a4f8b667b803aee9fa794069f61afb5839ce4.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/rdma/ib_addr.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index d808dc3d239e..811a0f11d0db 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -194,29 +194,6 @@ static inline enum ib_mtu iboe_get_mtu(int mtu) return 0; } -static inline int iboe_get_rate(struct net_device *dev) -{ - struct ethtool_link_ksettings cmd; - int err; - - rtnl_lock(); - err = __ethtool_get_link_ksettings(dev, &cmd); - rtnl_unlock(); - if (err) - return IB_RATE_PORT_CURRENT; - - if (cmd.base.speed >= 40000) - return IB_RATE_40_GBPS; - else if (cmd.base.speed >= 30000) - return IB_RATE_30_GBPS; - else if (cmd.base.speed >= 20000) - return IB_RATE_20_GBPS; - else if (cmd.base.speed >= 10000) - return IB_RATE_10_GBPS; - else - return IB_RATE_PORT_CURRENT; -} - static inline int rdma_link_local_addr(struct in6_addr *addr) { if (addr->s6_addr32[0] == htonl(0xfe800000) && -- cgit v1.2.3 From 617f5db1a626f18d5cbb7c7faf7bf8f9ea12be78 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 5 Jun 2023 13:33:26 +0300 Subject: RDMA/mlx5: Fix affinity assignment The cited commit aimed to ensure that Virtual Functions (VFs) assign a queue affinity to a Queue Pair (QP) to distribute traffic when the LAG master creates a hardware LAG. If the affinity was set while the hardware was not in LAG, the firmware would ignore the affinity value. However, this commit unintentionally assigned an affinity to QPs on the LAG master's VPORT even if the RDMA device was not marked as LAG-enabled. In most cases, this was not an issue because when the hardware entered hardware LAG configuration, the RDMA device of the LAG master would be destroyed and a new one would be created, marked as LAG-enabled. The problem arises when a user configures Equal-Cost Multipath (ECMP). In ECMP mode, traffic can be directed to different physical ports based on the queue affinity, which is intended for use by VPORTS other than the E-Switch manager. ECMP mode is supported only if both E-Switch managers are in switchdev mode and the appropriate route is configured via IP. In this configuration, the RDMA device is not destroyed, and we retain the RDMA device that is not marked as LAG-enabled. To ensure correct behavior, Send Queues (SQs) opened by the E-Switch manager through verbs should be assigned strict affinity. This means they will only be able to communicate through the native physical port associated with the E-Switch manager. This will prevent the firmware from assigning affinity and will not allow the SQs to be remapped in case of failover. Fixes: 802dcc7fc5ec ("RDMA/mlx5: Support TX port affinity for VF drivers in LAG mode") Reviewed-by: Maor Gottlieb Signed-off-by: Mark Bloch Link: https://lore.kernel.org/r/425b05f4da840bc684b0f7e8ebf61aeb5cef09b0.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +++ drivers/infiniband/hw/mlx5/qp.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 12 ------------ include/linux/mlx5/driver.h | 12 ++++++++++++ 4 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 91fc0cdf377d..2dfa6f49a6f4 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1598,6 +1598,9 @@ static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev) MLX5_CAP_PORT_SELECTION(dev->mdev, port_select_flow_table_bypass)) return 0; + if (mlx5_lag_is_lacp_owner(dev->mdev) && !dev->lag_active) + return 0; + return dev->lag_active || (MLX5_CAP_GEN(dev->mdev, num_lag_ports) > 1 && MLX5_CAP_GEN(dev->mdev, lag_tx_port_affinity)); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 70ca8ffa9256..78b96bfb4e6a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1237,6 +1237,9 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid); MLX5_SET(tisc, tisc, transport_domain, tdn); + if (!mlx5_ib_lag_should_assign_affinity(dev) && + mlx5_lag_is_lacp_owner(dev->mdev)) + MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); if (qp->flags & IB_QP_CREATE_SOURCE_QPN) MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 1d879374acaa..229520405d4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -276,18 +276,6 @@ static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) return pci_num_vf(dev->pdev) ? true : false; } -static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) -{ - /* LACP owner conditions: - * 1) Function is physical. - * 2) LAG is supported by FW. - * 3) LAG is managed by driver (currently the only option). - */ - return MLX5_CAP_GEN(dev, vport_group_manager) && - (MLX5_CAP_GEN(dev, num_lag_ports) > 1) && - MLX5_CAP_GEN(dev, lag_master); -} - int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev); static inline int mlx5_rescan_drivers(struct mlx5_core_dev *dev) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a4c4f737f9c1..8ad16b779898 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1237,6 +1237,18 @@ static inline u16 mlx5_core_max_vfs(const struct mlx5_core_dev *dev) return dev->priv.sriov.max_vfs; } +static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) +{ + /* LACP owner conditions: + * 1) Function is physical. + * 2) LAG is supported by FW. + * 3) LAG is managed by driver (currently the only option). + */ + return MLX5_CAP_GEN(dev, vport_group_manager) && + (MLX5_CAP_GEN(dev, num_lag_ports) > 1) && + MLX5_CAP_GEN(dev, lag_master); +} + static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { -- cgit v1.2.3