summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
diff options
context:
space:
mode:
authorDmytro Linkin <dlinkin@nvidia.com>2021-06-01 12:37:39 +0300
committerSaeed Mahameed <saeedm@nvidia.com>2021-08-20 07:50:40 +0300
commit0fe132eac38cf20fc584c7537e62a11415788269 (patch)
tree7341f6428c66ba568750d0254c01377a69b48d1e /drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
parentf47e04eb96e02e6bd870dd5ce5da1d612b43b28d (diff)
downloadlinux-0fe132eac38cf20fc584c7537e62a11415788269.tar.xz
net/mlx5: E-switch, Allow to add vports to rate groups
Implement eswitch API that allows updating rate groups. If group pointer is NULL, then move the vport to internal unlimited group zero. Implement devlink_ops->rate_parent_node_set() callback in the terms of the new eswitch group update API. Enable QoS for all group's elements if a group has allocated BW share. Co-developed-by: Vlad Buslov <vladbu@nvidia.com> Signed-off-by: Vlad Buslov <vladbu@nvidia.com> Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com> Reviewed-by: Huy Nguyen <huyn@nvidia.com> Reviewed-by: Mark Bloch <mbloch@nvidia.com> Reviewed-by: Parav Pandit <parav@nvidia.com> Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c206
1 files changed, 183 insertions, 23 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index 138b11073278..692c9d543f75 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -63,20 +63,23 @@ static int esw_qos_vport_config(struct mlx5_eswitch *esw,
struct netlink_ext_ack *extack)
{
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_esw_rate_group *group = vport->qos.group;
struct mlx5_core_dev *dev = esw->dev;
+ u32 parent_tsar_ix;
void *vport_elem;
int err;
if (!vport->qos.enabled)
return -EIO;
+ parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
MLX5_SET(scheduling_context, sched_ctx, element_type,
SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
element_attributes);
MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
- err = esw_qos_tsar_config(dev, sched_ctx, esw->qos.root_tsar_ix, vport->qos.esw_tsar_ix,
+ err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix,
max_rate, bw_share);
if (err) {
esw_warn(esw->dev,
@@ -109,7 +112,7 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
} else {
mlx5_esw_for_each_vport(esw, i, evport) {
if (!evport->enabled || !evport->qos.enabled ||
- evport->qos.min_rate < max_guarantee)
+ evport->qos.group != group || evport->qos.min_rate < max_guarantee)
continue;
max_guarantee = evport->qos.min_rate;
}
@@ -117,6 +120,12 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
if (max_guarantee)
return max_t(u32, max_guarantee / fw_max_bw_share, 1);
+
+ /* If vports min rate divider is 0 but their group has bw_share configured, then
+ * need to set bw_share for vports to minimal value.
+ */
+ if (!group_level && !max_guarantee && group->bw_share)
+ return 1;
return 0;
}
@@ -140,7 +149,7 @@ static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
int err;
mlx5_esw_for_each_vport(esw, i, evport) {
- if (!evport->enabled || !evport->qos.enabled)
+ if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
continue;
bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
@@ -176,6 +185,14 @@ static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divid
return err;
group->bw_share = bw_share;
+
+ /* All the group's vports need to be set with default bw_share
+ * to enable them with QOS
+ */
+ err = esw_qos_normalize_vports_min_rate(esw, group, extack);
+
+ if (err)
+ return err;
}
return 0;
@@ -201,7 +218,7 @@ int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw,
previous_min_rate = evport->qos.min_rate;
evport->qos.min_rate = min_rate;
- err = esw_qos_normalize_vports_min_rate(esw, NULL, extack);
+ err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
if (err)
evport->qos.min_rate = previous_min_rate;
@@ -213,6 +230,7 @@ int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
u32 max_rate,
struct netlink_ext_ack *extack)
{
+ u32 act_max_rate = max_rate;
bool max_rate_supported;
int err;
@@ -224,7 +242,13 @@ int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
if (max_rate == evport->qos.max_rate)
return 0;
- err = esw_qos_vport_config(esw, evport, max_rate, evport->qos.bw_share, extack);
+ /* If parent group has rate limit need to set to group
+ * value when new max rate is 0.
+ */
+ if (evport->qos.group && !max_rate)
+ act_max_rate = evport->qos.group->max_rate;
+
+ err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
if (!err)
evport->qos.max_rate = max_rate;
@@ -267,6 +291,8 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
struct mlx5_esw_rate_group *group,
u32 max_rate, struct netlink_ext_ack *extack)
{
+ struct mlx5_vport *vport;
+ unsigned long i;
int err;
if (group->max_rate == max_rate)
@@ -278,9 +304,127 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
group->max_rate = max_rate;
+ /* Any unlimited vports in the group should be set
+ * with the value of the group.
+ */
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ if (!vport->enabled || !vport->qos.enabled ||
+ vport->qos.group != group || vport->qos.max_rate)
+ continue;
+
+ err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack,
+ "E-Switch vport implicit rate limit setting failed");
+ }
+
+ return err;
+}
+
+static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_esw_rate_group *group = vport->qos.group;
+ struct mlx5_core_dev *dev = esw->dev;
+ u32 parent_tsar_ix;
+ void *vport_elem;
+ int err;
+
+ parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+ vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
+ MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+
+ err = mlx5_create_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ sched_ctx,
+ &vport->qos.esw_tsar_ix);
+ if (err) {
+ esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
+ vport->vport, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *curr_group,
+ struct mlx5_esw_rate_group *new_group,
+ struct netlink_ext_ack *extack)
+{
+ u32 max_rate;
+ int err;
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ vport->qos.esw_tsar_ix);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
+ return err;
+ }
+
+ vport->qos.group = new_group;
+ max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
+
+ /* If vport is unlimited, we set the group's value.
+ * Therefore, if the group is limited it will apply to
+ * the vport as well and if not, vport will remain unlimited.
+ */
+ err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
+ goto err_sched;
+ }
+
+ return 0;
+
+err_sched:
+ vport->qos.group = curr_group;
+ max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
+ if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
+ esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
+ vport->vport);
+
return err;
}
+static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *new_group, *curr_group;
+ int err;
+
+ if (!vport->enabled)
+ return -EINVAL;
+
+ curr_group = vport->qos.group;
+ new_group = group ?: esw->qos.group0;
+ if (curr_group == new_group)
+ return 0;
+
+ err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
+ if (err)
+ return err;
+
+ /* Recalculate bw share weights of old and new groups */
+ if (vport->qos.bw_share) {
+ esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
+ esw_qos_normalize_vports_min_rate(esw, new_group, extack);
+ }
+
+ return 0;
+}
+
static struct mlx5_esw_rate_group *
esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
{
@@ -457,9 +601,6 @@ unlock:
int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
u32 max_rate, u32 bw_share)
{
- u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
- struct mlx5_core_dev *dev = esw->dev;
- void *vport_elem;
int err;
lockdep_assert_held(&esw->state_lock);
@@ -469,22 +610,10 @@ int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport
if (vport->qos.enabled)
return -EEXIST;
- MLX5_SET(scheduling_context, sched_ctx, element_type,
- SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
- vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
- MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
- MLX5_SET(scheduling_context, sched_ctx, parent_element_id, esw->qos.root_tsar_ix);
- MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
- MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+ vport->qos.group = esw->qos.group0;
- err = mlx5_create_scheduling_element_cmd(dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- sched_ctx,
- &vport->qos.esw_tsar_ix);
- if (err)
- esw_warn(dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
- vport->vport, err);
- else
+ err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
+ if (!err)
vport->qos.enabled = true;
return err;
@@ -497,6 +626,8 @@ void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo
lockdep_assert_held(&esw->state_lock);
if (!esw->qos.enabled || !vport->qos.enabled)
return;
+ WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
+ "Disabling QoS on port before detaching it from group");
err = mlx5_destroy_scheduling_element_cmd(esw->dev,
SCHEDULING_HIERARCHY_E_SWITCH,
@@ -696,3 +827,32 @@ int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
mutex_unlock(&esw->state_lock);
return err;
}
+
+int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_vport_update_group(esw, vport, group, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
+ struct devlink_rate *parent,
+ void *priv, void *parent_priv,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *group;
+ struct mlx5_vport *vport = priv;
+
+ if (!parent)
+ return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
+ vport, NULL, extack);
+
+ group = parent_priv;
+ return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
+}