From 3410fbcd47dc6479af4309febf760ccaa5efb472 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 12 May 2021 13:52:27 +0300 Subject: {net, RDMA}/mlx5: Fix override of log_max_qp by other device mlx5_core_dev holds pointer to static profile, hence when the log_max_qp of the profile is override by some device, then it effect all other mlx5 devices that share the same profile. Fix it by having a profile instance for every mlx5 device. Fixes: 883371c453b9 ("net/mlx5: Check FW limitations on log_max_qp before setting it") Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c114365eb126..a1d67bd7fb43 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -503,7 +503,7 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx) static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) { - struct mlx5_profile *prof = dev->profile; + struct mlx5_profile *prof = &dev->profile; void *set_hca_cap; int err; @@ -524,11 +524,11 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) to_fw_pkey_sz(dev, 128)); /* Check log_max_qp from HCA caps to set in current profile */ - if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) { + if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", - profile[prof_sel].log_max_qp, + prof->log_max_qp, MLX5_CAP_GEN_MAX(dev, log_max_qp)); - profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); + prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); } if (prof->mask & MLX5_PROF_MASK_QP_SIZE) MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, @@ -1381,8 +1381,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) struct mlx5_priv *priv = &dev->priv; int err; - dev->profile = &profile[profile_idx]; - + memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile)); INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); mutex_init(&dev->intf_state_mutex); -- cgit v1.2.3 From dca59f4a791960ec73fa15803faa0abe0f92ece2 Mon Sep 17 00:00:00 2001 From: Dima Chumak Date: Mon, 26 Apr 2021 15:16:26 +0300 Subject: net/mlx5e: Fix nullptr in add_vlan_push_action() The result of dev_get_by_index_rcu() is not checked for NULL and then gets dereferenced immediately. Also, the RCU lock must be held by the caller of dev_get_by_index_rcu(), which isn't satisfied by the call stack. Fix by handling nullptr return value when iflink device is not found. Add RCU locking around dev_get_by_index_rcu() to avoid possible adverse effects while iterating over the net_device's hlist. It is safe not to increment reference count of the net_device pointer in case of a successful lookup, because it's already handled by VLAN code during VLAN device registration (see register_vlan_dev and netdev_upper_dev_link). Fixes: 278748a95aa3 ("net/mlx5e: Offload TC e-switch rules with egress VLAN device") Addresses-Coverity: ("Dereference null return value") Signed-off-by: Dima Chumak Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 47a9c49b25fd..46945d04b5b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3526,8 +3526,12 @@ static int add_vlan_push_action(struct mlx5e_priv *priv, if (err) return err; - *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), - dev_get_iflink(vlan_dev)); + rcu_read_lock(); + *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev)); + rcu_read_unlock(); + if (!*out_dev) + return -ENODEV; + if (is_vlan_dev(*out_dev)) err = add_vlan_push_action(priv, attr, out_dev, action); -- cgit v1.2.3 From 442b3d7b671bcb779ebdad46edd08051eb8b28d9 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Fri, 30 Apr 2021 06:58:29 +0000 Subject: net/mlx5: Set reformat action when needed for termination rules For remote mirroring, after the tunnel packets are received, they are decapsulated and sent to representor, then re-encapsulated and sent out over another tunnel. So reformat action is set only when the destination is required to do encapsulation. Fixes: 249ccc3c95bd ("net/mlx5e: Add support for offloading traffic from uplink to uplink") Signed-off-by: Jianbo Liu Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads_termtbl.c | 31 +++++++--------------- 1 file changed, 10 insertions(+), 21 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index a81ece94f599..e3e7fdd396ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -172,19 +172,6 @@ mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, } } -static bool mlx5_eswitch_termtbl_is_encap_reformat(struct mlx5_pkt_reformat *rt) -{ - switch (rt->reformat_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: - case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: - case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: - return true; - default: - return false; - } -} - static void mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src, struct mlx5_flow_act *dst) @@ -202,14 +189,6 @@ mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src, memset(&src->vlan[1], 0, sizeof(src->vlan[1])); } } - - if (src->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT && - mlx5_eswitch_termtbl_is_encap_reformat(src->pkt_reformat)) { - src->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - dst->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - dst->pkt_reformat = src->pkt_reformat; - src->pkt_reformat = NULL; - } } static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, @@ -279,6 +258,14 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT) continue; + if (attr->dests[num_vport_dests].flags & MLX5_ESW_DEST_ENCAP) { + term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + term_tbl_act.pkt_reformat = attr->dests[num_vport_dests].pkt_reformat; + } else { + term_tbl_act.action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + term_tbl_act.pkt_reformat = NULL; + } + /* get the terminating table for the action list */ tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act, &dest[i], attr); @@ -301,6 +288,8 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, goto revert_changes; /* create the FTE */ + flow_act->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = NULL; rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest); if (IS_ERR(rule)) goto revert_changes; -- cgit v1.2.3 From fca086617af864efd20289774901221b2df06b39 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 13 May 2021 15:00:53 +0300 Subject: net/mlx5: Fix err prints and return when creating termination table Fix print to print correct error code and not using IS_ERR() which will just result in always printing 1. Also return real err instead of always -EOPNOTSUPP. Fixes: 10caabdaad5a ("net/mlx5e: Use termination table for VLAN push actions") Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads_termtbl.c | 23 +++++++++++----------- 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index e3e7fdd396ad..d61bee2d35fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -65,7 +65,7 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, { struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_namespace *root_ns; - int err; + int err, err2; root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); if (!root_ns) { @@ -83,26 +83,26 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, ft_attr.autogroup.max_num_groups = 1; tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); if (IS_ERR(tt->termtbl)) { - esw_warn(dev, "Failed to create termination table (error %d)\n", - IS_ERR(tt->termtbl)); - return -EOPNOTSUPP; + err = PTR_ERR(tt->termtbl); + esw_warn(dev, "Failed to create termination table, err %pe\n", tt->termtbl); + return err; } tt->rule = mlx5_add_flow_rules(tt->termtbl, NULL, flow_act, &tt->dest, 1); if (IS_ERR(tt->rule)) { - esw_warn(dev, "Failed to create termination table rule (error %d)\n", - IS_ERR(tt->rule)); + err = PTR_ERR(tt->rule); + esw_warn(dev, "Failed to create termination table rule, err %pe\n", tt->rule); goto add_flow_err; } return 0; add_flow_err: - err = mlx5_destroy_flow_table(tt->termtbl); - if (err) - esw_warn(dev, "Failed to destroy termination table\n"); + err2 = mlx5_destroy_flow_table(tt->termtbl); + if (err2) + esw_warn(dev, "Failed to destroy termination table, err %d\n", err2); - return -EOPNOTSUPP; + return err; } static struct mlx5_termtbl_handle * @@ -270,8 +270,7 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act, &dest[i], attr); if (IS_ERR(tt)) { - esw_warn(esw->dev, "Failed to get termination table (error %d)\n", - IS_ERR(tt)); + esw_warn(esw->dev, "Failed to get termination table, err %pe\n", tt); goto revert_changes; } attr->dests[num_vport_dests].termtbl = tt; -- cgit v1.2.3 From 82041634d96e87b41c600a673f10150d9f21f742 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 7 May 2021 10:08:47 +0300 Subject: net/mlx5: SF, Fix show state inactive when its inactivated When a SF is inactivated and when it is in a TEARDOWN_REQUEST state, driver still returns its state as active. This is incorrect. Fix it by treating TEARDOWN_REQEUST as inactive state. When a SF is still attached to the driver, on user request to reactivate EINVAL error is returned. Inform user about it with better code EBUSY and informative error message. Fixes: 6a3273217469 ("net/mlx5: SF, Port function state change support") Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index a8e73c9ed1ea..1be048769309 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -136,10 +136,10 @@ static enum devlink_port_fn_state mlx5_sf_to_devlink_state(u8 hw_state) switch (hw_state) { case MLX5_VHCA_STATE_ACTIVE: case MLX5_VHCA_STATE_IN_USE: - case MLX5_VHCA_STATE_TEARDOWN_REQUEST: return DEVLINK_PORT_FN_STATE_ACTIVE; case MLX5_VHCA_STATE_INVALID: case MLX5_VHCA_STATE_ALLOCATED: + case MLX5_VHCA_STATE_TEARDOWN_REQUEST: default: return DEVLINK_PORT_FN_STATE_INACTIVE; } @@ -192,14 +192,17 @@ sf_err: return err; } -static int mlx5_sf_activate(struct mlx5_core_dev *dev, struct mlx5_sf *sf) +static int mlx5_sf_activate(struct mlx5_core_dev *dev, struct mlx5_sf *sf, + struct netlink_ext_ack *extack) { int err; if (mlx5_sf_is_active(sf)) return 0; - if (sf->hw_state != MLX5_VHCA_STATE_ALLOCATED) - return -EINVAL; + if (sf->hw_state != MLX5_VHCA_STATE_ALLOCATED) { + NL_SET_ERR_MSG_MOD(extack, "SF is inactivated but it is still attached"); + return -EBUSY; + } err = mlx5_cmd_sf_enable_hca(dev, sf->hw_fn_id); if (err) @@ -226,7 +229,8 @@ static int mlx5_sf_deactivate(struct mlx5_core_dev *dev, struct mlx5_sf *sf) static int mlx5_sf_state_set(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, struct mlx5_sf *sf, - enum devlink_port_fn_state state) + enum devlink_port_fn_state state, + struct netlink_ext_ack *extack) { int err = 0; @@ -234,7 +238,7 @@ static int mlx5_sf_state_set(struct mlx5_core_dev *dev, struct mlx5_sf_table *ta if (state == mlx5_sf_to_devlink_state(sf->hw_state)) goto out; if (state == DEVLINK_PORT_FN_STATE_ACTIVE) - err = mlx5_sf_activate(dev, sf); + err = mlx5_sf_activate(dev, sf, extack); else if (state == DEVLINK_PORT_FN_STATE_INACTIVE) err = mlx5_sf_deactivate(dev, sf); else @@ -265,7 +269,7 @@ int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_po goto out; } - err = mlx5_sf_state_set(dev, table, sf, state); + err = mlx5_sf_state_set(dev, table, sf, state, extack); out: mlx5_sf_table_put(table); return err; -- cgit v1.2.3 From fe7738eb3ca3631a75844e790f6cb576c0fe7b00 Mon Sep 17 00:00:00 2001 From: Dima Chumak Date: Mon, 26 Apr 2021 15:16:26 +0300 Subject: net/mlx5e: Fix nullptr in mlx5e_tc_add_fdb_flow() The result of __dev_get_by_index() is not checked for NULL, which then passed to mlx5e_attach_encap() and gets dereferenced. Also, in case of a successful lookup, the net_device reference count is not incremented, which may result in net_device pointer becoming invalid at any time during mlx5e_attach_encap() execution. Fix by using dev_get_by_index(), which does proper reference counting on the net_device pointer. Also, handle nullptr return value when mirred device is not found. It's safe to call dev_put() on the mirred net_device pointer, right after mlx5e_attach_encap() call, because it's not being saved/copied down the call chain. Fixes: 3c37745ec614 ("net/mlx5e: Properly deal with encap flows add/del under neigh update") Addresses-Coverity: ("Dereference null return value") Signed-off-by: Dima Chumak Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 46945d04b5b8..882bafba43f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1322,10 +1322,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct net_device *out_dev, *encap_dev = NULL; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; bool vf_tun = false, encap_valid = true; + struct net_device *encap_dev = NULL; struct mlx5_esw_flow_attr *esw_attr; struct mlx5_fc *counter = NULL; struct mlx5e_rep_priv *rpriv; @@ -1371,16 +1371,22 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, esw_attr = attr->esw_attr; for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + struct net_device *out_dev; int mirred_ifindex; if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) continue; mirred_ifindex = parse_attr->mirred_ifindex[out_index]; - out_dev = __dev_get_by_index(dev_net(priv->netdev), - mirred_ifindex); + out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex); + if (!out_dev) { + NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found"); + err = -ENODEV; + goto err_out; + } err = mlx5e_attach_encap(priv, flow, out_dev, out_index, extack, &encap_dev, &encap_valid); + dev_put(out_dev); if (err) goto err_out; -- cgit v1.2.3 From 83026d83186bc48bb41ee4872f339b83f31dfc55 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 3 May 2021 18:01:02 +0300 Subject: net/mlx5e: Fix null deref accessing lag dev It could be the lag dev is null so stop processing the event. In bond_enslave() the active/backup slave being set before setting the upper dev so first event is without an upper dev. After setting the upper dev with bond_master_upper_dev_link() there is a second event and in that event we have an upper dev. Fixes: 7e51891a237f ("net/mlx5e: Use netdev events to set/del egress acl forward-to-vport rule") Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx5/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c index 95f2b26a3ee3..9c076aa20306 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c @@ -223,6 +223,8 @@ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *pt rpriv = priv->ppriv; fwd_vport_num = rpriv->rep->vport; lag_dev = netdev_master_upper_dev_get(netdev); + if (!lag_dev) + return; netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n", lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev)); -- cgit v1.2.3 From eb96cc15926f4ddde3a28c42feeffdf002451c24 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 2 May 2021 10:25:50 +0300 Subject: net/mlx5e: Make sure fib dev exists in fib event For unreachable route entry the fib dev does not exists. Fixes: 8914add2c9e5 ("net/mlx5e: Handle FIB events to update tunnel endpoint device") Reported-by: Dennis Afanasev Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 593503bc4d07..f1fb11680d20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -1505,7 +1505,7 @@ mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, fen_info = container_of(info, struct fib_entry_notifier_info, info); fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; - if (fib_dev->netdev_ops != &mlx5e_netdev_ops || + if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops || fen_info->dst_len != 32) return NULL; -- cgit v1.2.3 From 77ecd10d0a8aaa6e4871d8c63626e4c9fc5e47db Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 25 Feb 2021 11:20:00 -0800 Subject: net/mlx5e: reset XPS on error flow if netdev isn't registered yet mlx5e_attach_netdev can be called prior to registering the netdevice: Example stack: ipoib_new_child_link -> ipoib_intf_init-> rdma_init_netdev-> mlx5_rdma_setup_rn-> mlx5e_attach_netdev-> mlx5e_num_channels_changed -> mlx5e_set_default_xps_cpumasks -> netif_set_xps_queue -> __netif_set_xps_queue -> kmalloc If any later stage fails at any point after mlx5e_num_channels_changed() returns, XPS allocated maps will never be freed as they are only freed during netdev unregistration, which will never happen for yet to be registered netdevs. Fixes: 3909a12e7913 ("net/mlx5e: Fix configuration of XPS cpumasks and netdev queues in corner cases") Signed-off-by: Saeed Mahameed Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx5/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index bca832cdc4cb..89937b055070 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5229,6 +5229,11 @@ static void mlx5e_update_features(struct net_device *netdev) rtnl_unlock(); } +static void mlx5e_reset_channels(struct net_device *netdev) +{ + netdev_reset_tc(netdev); +} + int mlx5e_attach_netdev(struct mlx5e_priv *priv) { const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED; @@ -5283,6 +5288,7 @@ err_cleanup_tx: profile->cleanup_tx(priv); out: + mlx5e_reset_channels(priv->netdev); set_bit(MLX5E_STATE_DESTROYING, &priv->state); cancel_work_sync(&priv->update_stats_work); return err; @@ -5300,6 +5306,7 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv) profile->cleanup_rx(priv); profile->cleanup_tx(priv); + mlx5e_reset_channels(priv->netdev); cancel_work_sync(&priv->update_stats_work); } -- cgit v1.2.3 From 97817fcc684ed01497bd19d0cd4dea699665b9cf Mon Sep 17 00:00:00 2001 From: Dima Chumak Date: Tue, 13 Apr 2021 22:43:08 +0300 Subject: net/mlx5e: Fix multipath lag activation When handling FIB_EVENT_ENTRY_REPLACE event for a new multipath route, lag activation can be missed if a stale (struct lag_mp)->mfi pointer exists, which was associated with an older multipath route that had been removed. Normally, when a route is removed, it triggers mlx5_lag_fib_event(), which handles FIB_EVENT_ENTRY_DEL and clears mfi pointer. But, if mlx5_lag_check_prereq() condition isn't met, for example when eswitch is in legacy mode, the fib event is skipped and mfi pointer becomes stale. Fix by resetting mfi pointer to NULL every time mlx5_lag_mp_init() is called. Fixes: 544fe7c2e654 ("net/mlx5e: Activate HW multipath and handle port affinity based on FIB events") Signed-off-by: Dima Chumak Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx5/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 2c41a6920264..fd6196b5e163 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -307,6 +307,11 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev) struct lag_mp *mp = &ldev->lag_mp; int err; + /* always clear mfi, as it might become stale when a route delete event + * has been missed + */ + mp->mfi = NULL; + if (mp->fib_nb.notifier_call) return 0; @@ -335,4 +340,5 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) unregister_fib_notifier(&init_net, &mp->fib_nb); destroy_workqueue(mp->wq); mp->fib_nb.notifier_call = NULL; + mp->mfi = NULL; } -- cgit v1.2.3 From 7d1a3d08c8a6398e7497a98cf3f7b73ea13d9939 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 20 Apr 2021 15:16:16 +0300 Subject: net/mlx5e: Reject mirroring on source port change encap rules Rules with MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE dest flag are translated to destination FT in eswitch. Currently it is not possible to mirror such rules because firmware doesn't support mixing FT and Vport destinations in single rule when one of them adds encapsulation. Since the only use case for MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE destination is support for tunnel endpoints on VF and trying to offload such rule with mirror action causes either crash in fs_core or firmware error with syndrome 0xff6a1d, reject all such rules in mlx5 TC layer. Fixes: 10742efc20a4 ("net/mlx5e: VF tunnel TX traffic offloading") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx5/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 882bafba43f2..bccdb43a880b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1399,6 +1399,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, esw_attr->dests[out_index].mdev = out_priv->mdev; } + if (vf_tun && esw_attr->out_count > 1) { + NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported"); + err = -EOPNOTSUPP; + goto err_out; + } + err = mlx5_eswitch_add_vlan_action(esw, attr); if (err) goto err_out; -- cgit v1.2.3 From 5e7923acbd86d0ff29269688d8a9c47ad091dd46 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 21 Apr 2021 14:26:31 +0300 Subject: net/mlx5e: Fix error path of updating netdev queues Avoid division by zero in the error flow. In the driver TC number can be either 1 or 8. When TC count is set to 1, driver zero netdev->num_tc. Hence, need to convert it back from 0 to 1 in the error flow. Fixes: fa3748775b92 ("net/mlx5e: Handle errors from netif_set_real_num_{tx,rx}_queues") Signed-off-by: Aya Levin Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 89937b055070..d1b9a4040d60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2697,7 +2697,7 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) int err; old_num_txqs = netdev->real_num_tx_queues; - old_ntc = netdev->num_tc; + old_ntc = netdev->num_tc ? : 1; nch = priv->channels.params.num_channels; ntc = priv->channels.params.num_tc; -- cgit v1.2.3 From 7c9f131f366ab414691907fa0407124ea2b2f3bc Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 22 Apr 2021 15:48:10 +0300 Subject: {net,vdpa}/mlx5: Configure interface MAC into mpfs L2 table net/mlx5: Expose MPFS configuration API MPFS is the multi physical function switch that bridges traffic between the physical port and any physical functions associated with it. The driver is required to add or remove MAC entries to properly forward incoming traffic to the correct physical function. We export the API to control MPFS so that other drivers, such as mlx5_vdpa are able to add MAC addresses of their network interfaces. The MAC address of the vdpa interface must be configured into the MPFS L2 address. Failing to do so could cause, in some NIC configurations, failure to forward packets to the vdpa network device instance. Fix this by adding calls to update the MPFS table. CC: CC: CC: Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices") Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h | 5 +---- drivers/vdpa/mlx5/net/mlx5_vnet.c | 19 ++++++++++++++++++- include/linux/mlx5/mpfs.h | 18 ++++++++++++++++++ 6 files changed, 42 insertions(+), 5 deletions(-) create mode 100644 include/linux/mlx5/mpfs.h (limited to 'drivers/net/ethernet/mellanox/mlx5/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 0d571a0c76d9..0b75fab41ae8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "en.h" #include "en_rep.h" #include "lib/mpfs.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 570f2280823c..b88705a3a1a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "esw/acl/lgcy.h" #include "esw/legacy.h" #include "mlx5_core.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index fd8449ff9e17..839a01da110f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "mlx5_core.h" #include "lib/mpfs.h" @@ -175,6 +176,7 @@ out: mutex_unlock(&mpfs->lock); return err; } +EXPORT_SYMBOL(mlx5_mpfs_add_mac); int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { @@ -206,3 +208,4 @@ unlock: mutex_unlock(&mpfs->lock); return err; } +EXPORT_SYMBOL(mlx5_mpfs_del_mac); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h index 4a7b2c3203a7..4a293542a7aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h @@ -84,12 +84,9 @@ struct l2addr_node { #ifdef CONFIG_MLX5_MPFS int mlx5_mpfs_init(struct mlx5_core_dev *dev); void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev); -int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac); -int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac); #else /* #ifndef CONFIG_MLX5_MPFS */ static inline int mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {} -static inline int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } -static inline int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } #endif + #endif diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 189e4385df40..dda5dc6f7737 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "mlx5_vdpa.h" MODULE_AUTHOR("Eli Cohen "); @@ -1859,11 +1860,16 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb static void mlx5_vdpa_free(struct vdpa_device *vdev) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_core_dev *pfmdev; struct mlx5_vdpa_net *ndev; ndev = to_mlx5_vdpa_ndev(mvdev); free_resources(ndev); + if (!is_zero_ether_addr(ndev->config.mac)) { + pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); + mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); + } mlx5_vdpa_free_resources(&ndev->mvdev); mutex_destroy(&ndev->reslock); } @@ -1990,6 +1996,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) { struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); struct virtio_net_config *config; + struct mlx5_core_dev *pfmdev; struct mlx5_vdpa_dev *mvdev; struct mlx5_vdpa_net *ndev; struct mlx5_core_dev *mdev; @@ -2023,10 +2030,17 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) if (err) goto err_mtu; + if (!is_zero_ether_addr(config->mac)) { + pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); + err = mlx5_mpfs_add_mac(pfmdev, config->mac); + if (err) + goto err_mtu; + } + mvdev->vdev.dma_dev = mdev->device; err = mlx5_vdpa_alloc_resources(&ndev->mvdev); if (err) - goto err_mtu; + goto err_mpfs; err = alloc_resources(ndev); if (err) @@ -2044,6 +2058,9 @@ err_reg: free_resources(ndev); err_res: mlx5_vdpa_free_resources(&ndev->mvdev); +err_mpfs: + if (!is_zero_ether_addr(config->mac)) + mlx5_mpfs_del_mac(pfmdev, config->mac); err_mtu: mutex_destroy(&ndev->reslock); put_device(&mvdev->vdev.dev); diff --git a/include/linux/mlx5/mpfs.h b/include/linux/mlx5/mpfs.h new file mode 100644 index 000000000000..bf700c8d5516 --- /dev/null +++ b/include/linux/mlx5/mpfs.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + * Copyright (c) 2021 Mellanox Technologies Ltd. + */ + +#ifndef _MLX5_MPFS_ +#define _MLX5_MPFS_ + +struct mlx5_core_dev; + +#ifdef CONFIG_MLX5_MPFS +int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac); +int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac); +#else /* #ifndef CONFIG_MLX5_MPFS */ +static inline int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } +static inline int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } +#endif + +#endif -- cgit v1.2.3 From 75e8564e919f369cafb3d2b8fd11ec5af7b37416 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 25 Apr 2021 13:28:10 +0300 Subject: net/mlx5: Don't overwrite HCA capabilities when setting MSI-X count During driver probe of device that has dynamic MSI-X feature enabled, the following error is printed in some FW flavour (not released yet). mlx5_core 0000:06:00.0: firmware version: 4.7.4387 mlx5_core 0000:06:00.0: 126.016 Gb/s available PCIe bandwidth (8.0 GT/s PCIe x16 link) mlx5_core 0000:06:00.0: mlx5_cmd_check:777:(pid 70599): SET_HCA_CAP(0x109) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x0) mlx5_core 0000:06:00.0: set_hca_cap:622:(pid 70599): handle_hca_cap failed mlx5_core 0000:06:00.0: mlx5_function_setup:1045:(pid 70599): set_hca_cap failed mlx5_core 0000:06:00.0: probe_one:1465:(pid 70599): mlx5_init_one failed with error code -22 mlx5_core: probe of 0000:06:00.0 failed with error -22 In order to make the setting capability of MSI-X future proof, let's query the current capabilities first. Fixes: 604774add516 ("net/mlx5: Dynamically assign MSI-X vectors count") Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 1f907df5b3a2..c3373fb1cd7f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -95,9 +95,10 @@ int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs) int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, int msix_vec_count) { - int sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *hca_cap = NULL, *query_cap = NULL, *cap; int num_vf_msix, min_msix, max_msix; - void *hca_cap, *cap; int ret; num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); @@ -116,11 +117,20 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, if (msix_vec_count > max_msix) return -EOVERFLOW; - hca_cap = kzalloc(sz, GFP_KERNEL); - if (!hca_cap) - return -ENOMEM; + query_cap = kzalloc(query_sz, GFP_KERNEL); + hca_cap = kzalloc(set_sz, GFP_KERNEL); + if (!hca_cap || !query_cap) { + ret = -ENOMEM; + goto out; + } + + ret = mlx5_vport_get_other_func_cap(dev, function_id, query_cap); + if (ret) + goto out; cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability); + memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability), + MLX5_UN_SZ_BYTES(hca_cap_union)); MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count); MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); @@ -130,7 +140,9 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, MLX5_SET(set_hca_cap_in, hca_cap, op_mod, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1); ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap); +out: kfree(hca_cap); + kfree(query_cap); return ret; } -- cgit v1.2.3 From 6ff51ab8aa8fcbcddeeefce8ca705b575805d12b Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Wed, 31 Mar 2021 10:09:02 +0300 Subject: net/mlx5: Set term table as an unmanaged flow table Termination tables are restricted to have the default miss action and cannot be set to forward to another table in case of a miss. If the fs prio of the termination table is not the last one in the list, fs_core will attempt to attach it to another table. Set the unmanaged ft flag when creating the termination table ft and select the tc offload prio for it to prevent fs_core from selecting the forwarding to next ft miss action and use the default one. In addition, set the flow that forwards to the termination table to ignore ft level restrictions since the ft level is not set by fs_core for unamanged fts. Fixes: 249ccc3c95bd ("net/mlx5e: Add support for offloading traffic from uplink to uplink") Signed-off-by: Ariel Levkovich --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index d61bee2d35fe..b45954905845 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -76,10 +76,11 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, /* As this is the terminating action then the termination table is the * same prio as the slow path */ - ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | + ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED | MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - ft_attr.prio = FDB_SLOW_PATH; + ft_attr.prio = FDB_TC_OFFLOAD; ft_attr.max_fte = 1; + ft_attr.level = 1; ft_attr.autogroup.max_num_groups = 1; tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); if (IS_ERR(tt->termtbl)) { @@ -217,6 +218,7 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, int i; if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level) || attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH || !mlx5_eswitch_offload_is_uplink_port(esw, spec)) return false; @@ -289,6 +291,7 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, /* create the FTE */ flow_act->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; flow_act->pkt_reformat = NULL; + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest); if (IS_ERR(rule)) goto revert_changes; -- cgit v1.2.3 From e63052a5dd3ce7979bff727a8f4bb6d6b3d1317b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 5 May 2021 13:20:26 -0700 Subject: mlx5e: add add missing BH locking around napi_schdule() It's not correct to call napi_schedule() in pure process context. Because we use __raise_softirq_irqoff() we require callers to be in a context which will eventually lead to softirq handling (hardirq, bh disabled, etc.). With code as is users will see: NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #08!!! Fixes: a8dd7ac12fc3 ("net/mlx5e: Generalize RQ activation") Signed-off-by: Jakub Kicinski Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d1b9a4040d60..ad0f69480b9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -889,10 +889,13 @@ err_free_rq: void mlx5e_activate_rq(struct mlx5e_rq *rq) { set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); - if (rq->icosq) + if (rq->icosq) { mlx5e_trigger_irq(rq->icosq); - else + } else { + local_bh_disable(); napi_schedule(rq->cq.napi); + local_bh_enable(); + } } void mlx5e_deactivate_rq(struct mlx5e_rq *rq) -- cgit v1.2.3 From 9453d45ecb6c2199d72e73c993e9d98677a2801b Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 25 May 2021 16:21:52 +0300 Subject: net: zero-initialize tc skb extension on allocation Function skb_ext_add() doesn't initialize created skb extension with any value and leaves it up to the user. However, since extension of type TC_SKB_EXT originally contained only single value tc_skb_ext->chain its users used to just assign the chain value without setting whole extension memory to zero first. This assumption changed when TC_SKB_EXT extension was extended with additional fields but not all users were updated to initialize the new fields which leads to use of uninitialized memory afterwards. UBSAN log: [ 778.299821] UBSAN: invalid-load in net/openvswitch/flow.c:899:28 [ 778.301495] load of value 107 is not a valid value for type '_Bool' [ 778.303215] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc7+ #2 [ 778.304933] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 778.307901] Call Trace: [ 778.308680] [ 778.309358] dump_stack+0xbb/0x107 [ 778.310307] ubsan_epilogue+0x5/0x40 [ 778.311167] __ubsan_handle_load_invalid_value.cold+0x43/0x48 [ 778.312454] ? memset+0x20/0x40 [ 778.313230] ovs_flow_key_extract.cold+0xf/0x14 [openvswitch] [ 778.314532] ovs_vport_receive+0x19e/0x2e0 [openvswitch] [ 778.315749] ? ovs_vport_find_upcall_portid+0x330/0x330 [openvswitch] [ 778.317188] ? create_prof_cpu_mask+0x20/0x20 [ 778.318220] ? arch_stack_walk+0x82/0xf0 [ 778.319153] ? secondary_startup_64_no_verify+0xb0/0xbb [ 778.320399] ? stack_trace_save+0x91/0xc0 [ 778.321362] ? stack_trace_consume_entry+0x160/0x160 [ 778.322517] ? lock_release+0x52e/0x760 [ 778.323444] netdev_frame_hook+0x323/0x610 [openvswitch] [ 778.324668] ? ovs_netdev_get_vport+0xe0/0xe0 [openvswitch] [ 778.325950] __netif_receive_skb_core+0x771/0x2db0 [ 778.327067] ? lock_downgrade+0x6e0/0x6f0 [ 778.328021] ? lock_acquire+0x565/0x720 [ 778.328940] ? generic_xdp_tx+0x4f0/0x4f0 [ 778.329902] ? inet_gro_receive+0x2a7/0x10a0 [ 778.330914] ? lock_downgrade+0x6f0/0x6f0 [ 778.331867] ? udp4_gro_receive+0x4c4/0x13e0 [ 778.332876] ? lock_release+0x52e/0x760 [ 778.333808] ? dev_gro_receive+0xcc8/0x2380 [ 778.334810] ? lock_downgrade+0x6f0/0x6f0 [ 778.335769] __netif_receive_skb_list_core+0x295/0x820 [ 778.336955] ? process_backlog+0x780/0x780 [ 778.337941] ? mlx5e_rep_tc_netdevice_event_unregister+0x20/0x20 [mlx5_core] [ 778.339613] ? seqcount_lockdep_reader_access.constprop.0+0xa7/0xc0 [ 778.341033] ? kvm_clock_get_cycles+0x14/0x20 [ 778.342072] netif_receive_skb_list_internal+0x5f5/0xcb0 [ 778.343288] ? __kasan_kmalloc+0x7a/0x90 [ 778.344234] ? mlx5e_handle_rx_cqe_mpwrq+0x9e0/0x9e0 [mlx5_core] [ 778.345676] ? mlx5e_xmit_xdp_frame_mpwqe+0x14d0/0x14d0 [mlx5_core] [ 778.347140] ? __netif_receive_skb_list_core+0x820/0x820 [ 778.348351] ? mlx5e_post_rx_mpwqes+0xa6/0x25d0 [mlx5_core] [ 778.349688] ? napi_gro_flush+0x26c/0x3c0 [ 778.350641] napi_complete_done+0x188/0x6b0 [ 778.351627] mlx5e_napi_poll+0x373/0x1b80 [mlx5_core] [ 778.352853] __napi_poll+0x9f/0x510 [ 778.353704] ? mlx5_flow_namespace_set_mode+0x260/0x260 [mlx5_core] [ 778.355158] net_rx_action+0x34c/0xa40 [ 778.356060] ? napi_threaded_poll+0x3d0/0x3d0 [ 778.357083] ? sched_clock_cpu+0x18/0x190 [ 778.358041] ? __common_interrupt+0x8e/0x1a0 [ 778.359045] __do_softirq+0x1ce/0x984 [ 778.359938] __irq_exit_rcu+0x137/0x1d0 [ 778.360865] irq_exit_rcu+0xa/0x20 [ 778.361708] common_interrupt+0x80/0xa0 [ 778.362640] [ 778.363212] asm_common_interrupt+0x1e/0x40 [ 778.364204] RIP: 0010:native_safe_halt+0xe/0x10 [ 778.365273] Code: 4f ff ff ff 4c 89 e7 e8 50 3f 40 fe e9 dc fe ff ff 48 89 df e8 43 3f 40 fe eb 90 cc e9 07 00 00 00 0f 00 2d 74 05 62 00 fb f4 90 e9 07 00 00 00 0f 00 2d 64 05 62 00 f4 c3 cc cc 0f 1f 44 00 [ 778.369355] RSP: 0018:ffffffff84407e48 EFLAGS: 00000246 [ 778.370570] RAX: ffff88842de46a80 RBX: ffffffff84425840 RCX: ffffffff83418468 [ 778.372143] RDX: 000000000026f1da RSI: 0000000000000004 RDI: ffffffff8343af5e [ 778.373722] RBP: fffffbfff0884b08 R08: 0000000000000000 R09: ffff88842de46bcb [ 778.375292] R10: ffffed1085bc8d79 R11: 0000000000000001 R12: 0000000000000000 [ 778.376860] R13: ffffffff851124a0 R14: 0000000000000000 R15: dffffc0000000000 [ 778.378491] ? rcu_eqs_enter.constprop.0+0xb8/0xe0 [ 778.379606] ? default_idle_call+0x5e/0xe0 [ 778.380578] default_idle+0xa/0x10 [ 778.381406] default_idle_call+0x96/0xe0 [ 778.382350] do_idle+0x3d4/0x550 [ 778.383153] ? arch_cpu_idle_exit+0x40/0x40 [ 778.384143] cpu_startup_entry+0x19/0x20 [ 778.385078] start_kernel+0x3c7/0x3e5 [ 778.385978] secondary_startup_64_no_verify+0xb0/0xbb Fix the issue by providing new function tc_skb_ext_alloc() that allocates tc skb extension and initializes its memory to 0 before returning it to the caller. Change all existing users to use new API instead of calling skb_ext_add() directly. Fixes: 038ebb1a713d ("net/sched: act_ct: fix miss set mru for ovs after defrag in act_ct") Fixes: d29334c15d33 ("net/sched: act_api: fix miss set post_ct for ovs after do conntrack in act_ct") Signed-off-by: Vlad Buslov Acked-by: Cong Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- include/net/pkt_cls.h | 11 +++++++++++ net/sched/cls_api.c | 2 +- 4 files changed, 14 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 6cdc52d50a48..311382261840 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -626,7 +626,7 @@ static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1, struct mlx5_eswitch *esw; u32 zone_restore_id; - tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + tc_skb_ext = tc_skb_ext_alloc(skb); if (!tc_skb_ext) { WARN_ON(1); return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index bccdb43a880b..2c776e7a7692 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5090,7 +5090,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) { chain = mapped_obj.chain; - tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + tc_skb_ext = tc_skb_ext_alloc(skb); if (WARN_ON(!tc_skb_ext)) return false; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 255e4f4b521f..ec7823921bd2 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -709,6 +709,17 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common, cls_common->extack = extack; } +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb) +{ + struct tc_skb_ext *tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + + if (tc_skb_ext) + memset(tc_skb_ext, 0, sizeof(*tc_skb_ext)); + return tc_skb_ext; +} +#endif + enum tc_matchall_command { TC_CLSMATCHALL_REPLACE, TC_CLSMATCHALL_DESTROY, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 40fbea626dfd..279f9e2a2319 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1624,7 +1624,7 @@ int tcf_classify_ingress(struct sk_buff *skb, /* If we missed on some chain */ if (ret == TC_ACT_UNSPEC && last_executed_chain) { - ext = skb_ext_add(skb, TC_SKB_EXT); + ext = tc_skb_ext_alloc(skb); if (WARN_ON_ONCE(!ext)) return TC_ACT_SHOT; ext->chain = last_executed_chain; -- cgit v1.2.3