summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core/en
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/health.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.c984
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.h44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c34
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c307
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h175
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c499
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c1653
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/trap.c457
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/trap.h37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c2
24 files changed, 4081 insertions, 249 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 5749557749b0..a16297e7e2ac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -44,6 +44,11 @@ struct mlx5e_l2_rule {
#define MLX5E_L2_ADDR_HASH_SIZE BIT(BITS_PER_BYTE)
+struct mlx5e_promisc_table {
+ struct mlx5e_flow_table ft;
+ struct mlx5_flow_handle *rule;
+};
+
struct mlx5e_vlan_table {
struct mlx5e_flow_table ft;
DECLARE_BITMAP(active_cvlans, VLAN_N_VID);
@@ -53,6 +58,7 @@ struct mlx5e_vlan_table {
struct mlx5_flow_handle *untagged_rule;
struct mlx5_flow_handle *any_cvlan_rule;
struct mlx5_flow_handle *any_svlan_rule;
+ struct mlx5_flow_handle *trap_rule;
bool cvlan_filter_disabled;
};
@@ -62,7 +68,7 @@ struct mlx5e_l2_table {
struct hlist_head netdev_mc[MLX5E_L2_ADDR_HASH_SIZE];
struct mlx5e_l2_rule broadcast;
struct mlx5e_l2_rule allmulti;
- struct mlx5e_l2_rule promisc;
+ struct mlx5_flow_handle *trap_rule;
bool broadcast_enabled;
bool allmulti_enabled;
bool promisc_enabled;
@@ -126,7 +132,8 @@ struct mlx5e_ttc_table {
/* NIC prio FTS */
enum {
- MLX5E_VLAN_FT_LEVEL = 0,
+ MLX5E_PROMISC_FT_LEVEL,
+ MLX5E_VLAN_FT_LEVEL,
MLX5E_L2_FT_LEVEL,
MLX5E_TTC_FT_LEVEL,
MLX5E_INNER_TTC_FT_LEVEL,
@@ -241,6 +248,7 @@ struct mlx5e_flow_steering {
struct mlx5e_ethtool_steering ethtool;
#endif
struct mlx5e_tc_table tc;
+ struct mlx5e_promisc_table promisc;
struct mlx5e_vlan_table vlan;
struct mlx5e_l2_table l2;
struct mlx5e_ttc_table ttc;
@@ -288,6 +296,10 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt);
+int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num);
+void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv);
+int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int trap_id, int tir_num);
+void mlx5e_remove_mac_trap(struct mlx5e_priv *priv);
#endif /* __MLX5E_FLOW_STEER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
index 718f8c0a4f6b..84e501e057b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -273,7 +273,7 @@ int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key
err = devlink_fmsg_binary_pair_nest_start(fmsg, "data");
if (err)
- return err;
+ goto free_page;
cmd = mlx5_rsc_dump_cmd_create(mdev, key);
if (IS_ERR(cmd)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 43271a3856ca..36381a2ed5a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -179,7 +179,7 @@ int mlx5e_validate_params(struct mlx5e_priv *priv, struct mlx5e_params *params)
stop_room = mlx5e_calc_sq_stop_room(priv->mdev, params);
if (stop_room >= sq_size) {
- netdev_err(priv->netdev, "Stop room %hu is bigger than the SQ size %zu\n",
+ netdev_err(priv->netdev, "Stop room %u is bigger than the SQ size %zu\n",
stop_room, sq_size);
return -EINVAL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index 807147d97a0f..ea2cfb04b31a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -118,6 +118,8 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv,
struct mlx5e_rq_param *param);
void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
struct mlx5e_sq_param *param);
+void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param);
void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 2a2bac30daaa..d57b6f06382f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -3,7 +3,6 @@
#include "en/ptp.h"
#include "en/txrx.h"
-#include "lib/clock.h"
struct mlx5e_skb_cb_hwtstamp {
ktime_t cqe_hwtstamp;
@@ -70,6 +69,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
int budget)
{
struct sk_buff *skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
+ struct mlx5e_txqsq *sq = &ptpsq->txqsq;
ktime_t hwtstamp;
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
@@ -77,7 +77,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
goto out;
}
- hwtstamp = mlx5_timecounter_cyc2time(ptpsq->txqsq.clock, get_cqe_ts(cqe));
+ hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_PORT_HWTSTAMP,
hwtstamp, ptpsq->cq_stats);
ptpsq->cq_stats->cqe++;
@@ -183,6 +183,9 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_port_ptp *c, int txq_ix,
if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
sq->stop_room = param->stop_room;
+ sq->ptp_cyc2time = mlx5_is_real_time_sq(mdev) ?
+ mlx5_real_time_cyc2time :
+ mlx5_timecounter_cyc2time;
node = dev_to_node(mlx5_core_dma_dev(mdev));
@@ -261,7 +264,7 @@ static int mlx5e_ptp_open_txqsq(struct mlx5e_port_ptp *c, u32 tisn,
csp.min_inline_mode = txqsq->min_inline_mode;
csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn;
- err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, &txqsq->sqn);
+ err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, 0, &txqsq->sqn);
if (err)
goto err_free_txqsq;
@@ -428,16 +431,13 @@ static int mlx5e_ptp_open_queues(struct mlx5e_port_ptp *c,
if (err)
return err;
- napi_enable(&c->napi);
-
err = mlx5e_ptp_open_txqsqs(c, cparams);
if (err)
- goto disable_napi;
+ goto close_cqs;
return 0;
-disable_napi:
- napi_disable(&c->napi);
+close_cqs:
mlx5e_ptp_close_cqs(c);
return err;
@@ -446,7 +446,6 @@ disable_napi:
static void mlx5e_ptp_close_queues(struct mlx5e_port_ptp *c)
{
mlx5e_ptp_close_txqsqs(c);
- napi_disable(&c->napi);
mlx5e_ptp_close_cqs(c);
}
@@ -515,6 +514,8 @@ void mlx5e_ptp_activate_channel(struct mlx5e_port_ptp *c)
{
int tc;
+ napi_enable(&c->napi);
+
for (tc = 0; tc < c->num_tc; tc++)
mlx5e_activate_txqsq(&c->ptpsq[tc].txqsq);
}
@@ -525,4 +526,6 @@ void mlx5e_ptp_deactivate_channel(struct mlx5e_port_ptp *c)
for (tc = 0; tc < c->num_tc; tc++)
mlx5e_deactivate_txqsq(&c->ptpsq[tc].txqsq);
+
+ napi_disable(&c->napi);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
new file mode 100644
index 000000000000..12d7ad061237
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
@@ -0,0 +1,984 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include "en.h"
+#include "params.h"
+#include "../qos.h"
+
+#define BYTES_IN_MBIT 125000
+
+int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev)
+{
+ return min(MLX5E_QOS_MAX_LEAF_NODES, mlx5_qos_max_leaf_nodes(mdev));
+}
+
+int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv)
+{
+ int last = find_last_bit(priv->htb.qos_used_qids, mlx5e_qos_max_leaf_nodes(priv->mdev));
+
+ return last == mlx5e_qos_max_leaf_nodes(priv->mdev) ? 0 : last + 1;
+}
+
+/* Software representation of the QoS tree (internal to this file) */
+
+static int mlx5e_find_unused_qos_qid(struct mlx5e_priv *priv)
+{
+ int size = mlx5e_qos_max_leaf_nodes(priv->mdev);
+ int res;
+
+ WARN_ONCE(!mutex_is_locked(&priv->state_lock), "%s: state_lock is not held\n", __func__);
+ res = find_first_zero_bit(priv->htb.qos_used_qids, size);
+
+ return res == size ? -ENOSPC : res;
+}
+
+struct mlx5e_qos_node {
+ struct hlist_node hnode;
+ struct rcu_head rcu;
+ struct mlx5e_qos_node *parent;
+ u64 rate;
+ u32 bw_share;
+ u32 max_average_bw;
+ u32 hw_id;
+ u32 classid; /* 16-bit, except root. */
+ u16 qid;
+};
+
+#define MLX5E_QOS_QID_INNER 0xffff
+#define MLX5E_HTB_CLASSID_ROOT 0xffffffff
+
+static struct mlx5e_qos_node *
+mlx5e_sw_node_create_leaf(struct mlx5e_priv *priv, u16 classid, u16 qid,
+ struct mlx5e_qos_node *parent)
+{
+ struct mlx5e_qos_node *node;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ node->parent = parent;
+
+ node->qid = qid;
+ __set_bit(qid, priv->htb.qos_used_qids);
+
+ node->classid = classid;
+ hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, classid);
+
+ mlx5e_update_tx_netdev_queues(priv);
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_sw_node_create_root(struct mlx5e_priv *priv)
+{
+ struct mlx5e_qos_node *node;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ node->qid = MLX5E_QOS_QID_INNER;
+ node->classid = MLX5E_HTB_CLASSID_ROOT;
+ hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, node->classid);
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_sw_node_find(struct mlx5e_priv *priv, u32 classid)
+{
+ struct mlx5e_qos_node *node = NULL;
+
+ hash_for_each_possible(priv->htb.qos_tc2node, node, hnode, classid) {
+ if (node->classid == classid)
+ break;
+ }
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_sw_node_find_rcu(struct mlx5e_priv *priv, u32 classid)
+{
+ struct mlx5e_qos_node *node = NULL;
+
+ hash_for_each_possible_rcu(priv->htb.qos_tc2node, node, hnode, classid) {
+ if (node->classid == classid)
+ break;
+ }
+
+ return node;
+}
+
+static void mlx5e_sw_node_delete(struct mlx5e_priv *priv, struct mlx5e_qos_node *node)
+{
+ hash_del_rcu(&node->hnode);
+ if (node->qid != MLX5E_QOS_QID_INNER) {
+ __clear_bit(node->qid, priv->htb.qos_used_qids);
+ mlx5e_update_tx_netdev_queues(priv);
+ }
+ kfree_rcu(node, rcu);
+}
+
+/* TX datapath API */
+
+static u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid)
+{
+ /* These channel params are safe to access from the datapath, because:
+ * 1. This function is called only after checking priv->htb.maj_id != 0,
+ * and the number of queues can't change while HTB offload is active.
+ * 2. When priv->htb.maj_id becomes 0, synchronize_rcu waits for
+ * mlx5e_select_queue to finish while holding priv->state_lock,
+ * preventing other code from changing the number of queues.
+ */
+ bool is_ptp = MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS);
+
+ return (chs->params.num_channels + is_ptp) * chs->params.num_tc + qid;
+}
+
+int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid)
+{
+ struct mlx5e_qos_node *node;
+ u16 qid;
+ int res;
+
+ rcu_read_lock();
+
+ node = mlx5e_sw_node_find_rcu(priv, classid);
+ if (!node) {
+ res = -ENOENT;
+ goto out;
+ }
+ qid = READ_ONCE(node->qid);
+ if (qid == MLX5E_QOS_QID_INNER) {
+ res = -EINVAL;
+ goto out;
+ }
+ res = mlx5e_qid_from_qos(&priv->channels, qid);
+
+out:
+ rcu_read_unlock();
+ return res;
+}
+
+static struct mlx5e_txqsq *mlx5e_get_qos_sq(struct mlx5e_priv *priv, int qid)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ struct mlx5e_channel *c;
+ int ix;
+
+ ix = qid % params->num_channels;
+ qid /= params->num_channels;
+ c = priv->channels.c[ix];
+
+ qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+ return mlx5e_state_dereference(priv, qos_sqs[qid]);
+}
+
+/* SQ lifecycle */
+
+static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
+ struct mlx5e_qos_node *node)
+{
+ struct mlx5e_create_cq_param ccp = {};
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ struct mlx5e_sq_param param_sq;
+ struct mlx5e_cq_param param_cq;
+ int txq_ix, ix, qid, err = 0;
+ struct mlx5e_params *params;
+ struct mlx5e_channel *c;
+ struct mlx5e_txqsq *sq;
+
+ params = &chs->params;
+
+ txq_ix = mlx5e_qid_from_qos(chs, node->qid);
+
+ WARN_ON(node->qid > priv->htb.max_qos_sqs);
+ if (node->qid == priv->htb.max_qos_sqs) {
+ struct mlx5e_sq_stats *stats, **stats_list = NULL;
+
+ if (priv->htb.max_qos_sqs == 0) {
+ stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
+ sizeof(*stats_list),
+ GFP_KERNEL);
+ if (!stats_list)
+ return -ENOMEM;
+ }
+ stats = kzalloc(sizeof(*stats), GFP_KERNEL);
+ if (!stats) {
+ kvfree(stats_list);
+ return -ENOMEM;
+ }
+ if (stats_list)
+ WRITE_ONCE(priv->htb.qos_sq_stats, stats_list);
+ WRITE_ONCE(priv->htb.qos_sq_stats[node->qid], stats);
+ /* Order max_qos_sqs increment after writing the array pointer.
+ * Pairs with smp_load_acquire in en_stats.c.
+ */
+ smp_store_release(&priv->htb.max_qos_sqs, priv->htb.max_qos_sqs + 1);
+ }
+
+ ix = node->qid % params->num_channels;
+ qid = node->qid / params->num_channels;
+ c = chs->c[ix];
+
+ qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+ sq = kzalloc(sizeof(*sq), GFP_KERNEL);
+
+ if (!sq)
+ return -ENOMEM;
+
+ mlx5e_build_create_cq_param(&ccp, c);
+
+ memset(&param_sq, 0, sizeof(param_sq));
+ memset(&param_cq, 0, sizeof(param_cq));
+ mlx5e_build_sq_param(priv, params, &param_sq);
+ mlx5e_build_tx_cq_param(priv, params, &param_cq);
+ err = mlx5e_open_cq(priv, params->tx_cq_moderation, &param_cq, &ccp, &sq->cq);
+ if (err)
+ goto err_free_sq;
+ err = mlx5e_open_txqsq(c, priv->tisn[c->lag_port][0], txq_ix, params,
+ &param_sq, sq, 0, node->hw_id, node->qid);
+ if (err)
+ goto err_close_cq;
+
+ rcu_assign_pointer(qos_sqs[qid], sq);
+
+ return 0;
+
+err_close_cq:
+ mlx5e_close_cq(&sq->cq);
+err_free_sq:
+ kfree(sq);
+ return err;
+}
+
+static void mlx5e_activate_qos_sq(struct mlx5e_priv *priv, struct mlx5e_qos_node *node)
+{
+ struct mlx5e_txqsq *sq;
+
+ sq = mlx5e_get_qos_sq(priv, node->qid);
+
+ WRITE_ONCE(priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, node->qid)], sq);
+
+ /* Make the change to txq2sq visible before the queue is started.
+ * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
+ * which pairs with this barrier.
+ */
+ smp_wmb();
+
+ qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node->qid);
+ mlx5e_activate_txqsq(sq);
+}
+
+static void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
+{
+ struct mlx5e_txqsq *sq;
+
+ sq = mlx5e_get_qos_sq(priv, qid);
+ if (!sq) /* Handle the case when the SQ failed to open. */
+ return;
+
+ qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid);
+ mlx5e_deactivate_txqsq(sq);
+
+ /* The queue is disabled, no synchronization with datapath is needed. */
+ priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL;
+}
+
+static void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid)
+{
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ struct mlx5e_params *params;
+ struct mlx5e_channel *c;
+ struct mlx5e_txqsq *sq;
+ int ix;
+
+ params = &priv->channels.params;
+
+ ix = qid % params->num_channels;
+ qid /= params->num_channels;
+ c = priv->channels.c[ix];
+ qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+ sq = rcu_replace_pointer(qos_sqs[qid], NULL, lockdep_is_held(&priv->state_lock));
+ if (!sq) /* Handle the case when the SQ failed to open. */
+ return;
+
+ synchronize_rcu(); /* Sync with NAPI. */
+
+ mlx5e_close_txqsq(sq);
+ mlx5e_close_cq(&sq->cq);
+ kfree(sq);
+}
+
+void mlx5e_qos_close_queues(struct mlx5e_channel *c)
+{
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ int i;
+
+ qos_sqs = rcu_replace_pointer(c->qos_sqs, NULL, lockdep_is_held(&c->priv->state_lock));
+ if (!qos_sqs)
+ return;
+ synchronize_rcu(); /* Sync with NAPI. */
+
+ for (i = 0; i < c->qos_sqs_size; i++) {
+ struct mlx5e_txqsq *sq;
+
+ sq = mlx5e_state_dereference(c->priv, qos_sqs[i]);
+ if (!sq) /* Handle the case when the SQ failed to open. */
+ continue;
+
+ mlx5e_close_txqsq(sq);
+ mlx5e_close_cq(&sq->cq);
+ kfree(sq);
+ }
+
+ kvfree(qos_sqs);
+}
+
+static void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs)
+{
+ int i;
+
+ for (i = 0; i < chs->num; i++)
+ mlx5e_qos_close_queues(chs->c[i]);
+}
+
+static int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+ u16 qos_sqs_size;
+ int i;
+
+ qos_sqs_size = DIV_ROUND_UP(mlx5e_qos_max_leaf_nodes(priv->mdev), chs->num);
+
+ for (i = 0; i < chs->num; i++) {
+ struct mlx5e_txqsq **sqs;
+
+ sqs = kvcalloc(qos_sqs_size, sizeof(struct mlx5e_txqsq *), GFP_KERNEL);
+ if (!sqs)
+ goto err_free;
+
+ WRITE_ONCE(chs->c[i]->qos_sqs_size, qos_sqs_size);
+ smp_wmb(); /* Pairs with mlx5e_napi_poll. */
+ rcu_assign_pointer(chs->c[i]->qos_sqs, sqs);
+ }
+
+ return 0;
+
+err_free:
+ while (--i >= 0) {
+ struct mlx5e_txqsq **sqs;
+
+ sqs = rcu_replace_pointer(chs->c[i]->qos_sqs, NULL,
+ lockdep_is_held(&priv->state_lock));
+
+ synchronize_rcu(); /* Sync with NAPI. */
+ kvfree(sqs);
+ }
+ return -ENOMEM;
+}
+
+int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+ struct mlx5e_qos_node *node = NULL;
+ int bkt, err;
+
+ if (!priv->htb.maj_id)
+ return 0;
+
+ err = mlx5e_qos_alloc_queues(priv, chs);
+ if (err)
+ return err;
+
+ hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) {
+ if (node->qid == MLX5E_QOS_QID_INNER)
+ continue;
+ err = mlx5e_open_qos_sq(priv, chs, node);
+ if (err) {
+ mlx5e_qos_close_all_queues(chs);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+void mlx5e_qos_activate_queues(struct mlx5e_priv *priv)
+{
+ struct mlx5e_qos_node *node = NULL;
+ int bkt;
+
+ hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) {
+ if (node->qid == MLX5E_QOS_QID_INNER)
+ continue;
+ mlx5e_activate_qos_sq(priv, node);
+ }
+}
+
+void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c)
+{
+ struct mlx5e_params *params = &c->priv->channels.params;
+ struct mlx5e_txqsq __rcu **qos_sqs;
+ int i;
+
+ qos_sqs = mlx5e_state_dereference(c->priv, c->qos_sqs);
+ if (!qos_sqs)
+ return;
+
+ for (i = 0; i < c->qos_sqs_size; i++) {
+ u16 qid = params->num_channels * i + c->ix;
+ struct mlx5e_txqsq *sq;
+
+ sq = mlx5e_state_dereference(c->priv, qos_sqs[i]);
+ if (!sq) /* Handle the case when the SQ failed to open. */
+ continue;
+
+ qos_dbg(c->mdev, "Deactivate QoS SQ qid %u\n", qid);
+ mlx5e_deactivate_txqsq(sq);
+
+ /* The queue is disabled, no synchronization with datapath is needed. */
+ c->priv->txq2sq[mlx5e_qid_from_qos(&c->priv->channels, qid)] = NULL;
+ }
+}
+
+static void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs)
+{
+ int i;
+
+ for (i = 0; i < chs->num; i++)
+ mlx5e_qos_deactivate_queues(chs->c[i]);
+}
+
+/* HTB API */
+
+int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *root;
+ bool opened;
+ int err;
+
+ qos_dbg(priv->mdev, "TC_HTB_CREATE handle %04x:, default :%04x\n", htb_maj_id, htb_defcls);
+
+ if (!mlx5_qos_is_supported(priv->mdev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Missing QoS capabilities. Try disabling SRIOV or use a supported device.");
+ return -EOPNOTSUPP;
+ }
+
+ opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (opened) {
+ err = mlx5e_qos_alloc_queues(priv, &priv->channels);
+ if (err)
+ return err;
+ }
+
+ root = mlx5e_sw_node_create_root(priv);
+ if (IS_ERR(root)) {
+ err = PTR_ERR(root);
+ goto err_free_queues;
+ }
+
+ err = mlx5_qos_create_root_node(priv->mdev, &root->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error. Try upgrading firmware.");
+ goto err_sw_node_delete;
+ }
+
+ WRITE_ONCE(priv->htb.defcls, htb_defcls);
+ /* Order maj_id after defcls - pairs with
+ * mlx5e_select_queue/mlx5e_select_htb_queues.
+ */
+ smp_store_release(&priv->htb.maj_id, htb_maj_id);
+
+ return 0;
+
+err_sw_node_delete:
+ mlx5e_sw_node_delete(priv, root);
+
+err_free_queues:
+ if (opened)
+ mlx5e_qos_close_all_queues(&priv->channels);
+ return err;
+}
+
+int mlx5e_htb_root_del(struct mlx5e_priv *priv)
+{
+ struct mlx5e_qos_node *root;
+ int err;
+
+ qos_dbg(priv->mdev, "TC_HTB_DESTROY\n");
+
+ WRITE_ONCE(priv->htb.maj_id, 0);
+ synchronize_rcu(); /* Sync with mlx5e_select_htb_queue and TX data path. */
+
+ root = mlx5e_sw_node_find(priv, MLX5E_HTB_CLASSID_ROOT);
+ if (!root) {
+ qos_err(priv->mdev, "Failed to find the root node in the QoS tree\n");
+ return -ENOENT;
+ }
+ err = mlx5_qos_destroy_node(priv->mdev, root->hw_id);
+ if (err)
+ qos_err(priv->mdev, "Failed to destroy root node %u, err = %d\n",
+ root->hw_id, err);
+ mlx5e_sw_node_delete(priv, root);
+
+ mlx5e_qos_deactivate_all_queues(&priv->channels);
+ mlx5e_qos_close_all_queues(&priv->channels);
+
+ return err;
+}
+
+static int mlx5e_htb_convert_rate(struct mlx5e_priv *priv, u64 rate,
+ struct mlx5e_qos_node *parent, u32 *bw_share)
+{
+ u64 share = 0;
+
+ while (parent->classid != MLX5E_HTB_CLASSID_ROOT && !parent->max_average_bw)
+ parent = parent->parent;
+
+ if (parent->max_average_bw)
+ share = div64_u64(div_u64(rate * 100, BYTES_IN_MBIT),
+ parent->max_average_bw);
+ else
+ share = 101;
+
+ *bw_share = share == 0 ? 1 : share > 100 ? 0 : share;
+
+ qos_dbg(priv->mdev, "Convert: rate %llu, parent ceil %llu -> bw_share %u\n",
+ rate, (u64)parent->max_average_bw * BYTES_IN_MBIT, *bw_share);
+
+ return 0;
+}
+
+static void mlx5e_htb_convert_ceil(struct mlx5e_priv *priv, u64 ceil, u32 *max_average_bw)
+{
+ *max_average_bw = div_u64(ceil, BYTES_IN_MBIT);
+
+ qos_dbg(priv->mdev, "Convert: ceil %llu -> max_average_bw %u\n",
+ ceil, *max_average_bw);
+}
+
+int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid,
+ u32 parent_classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *parent;
+ int qid;
+ int err;
+
+ qos_dbg(priv->mdev, "TC_HTB_LEAF_ALLOC_QUEUE classid %04x, parent %04x, rate %llu, ceil %llu\n",
+ classid, parent_classid, rate, ceil);
+
+ qid = mlx5e_find_unused_qos_qid(priv);
+ if (qid < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Maximum amount of leaf classes is reached.");
+ return qid;
+ }
+
+ parent = mlx5e_sw_node_find(priv, parent_classid);
+ if (!parent)
+ return -EINVAL;
+
+ node = mlx5e_sw_node_create_leaf(priv, classid, qid, parent);
+ if (IS_ERR(node))
+ return PTR_ERR(node);
+
+ node->rate = rate;
+ mlx5e_htb_convert_rate(priv, rate, node->parent, &node->bw_share);
+ mlx5e_htb_convert_ceil(priv, ceil, &node->max_average_bw);
+
+ err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->hw_id,
+ node->bw_share, node->max_average_bw,
+ &node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ mlx5e_sw_node_delete(priv, node);
+ return err;
+ }
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node);
+ }
+ }
+
+ return mlx5e_qid_from_qos(&priv->channels, node->qid);
+}
+
+int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid,
+ u64 rate, u64 ceil, struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *child;
+ int err, tmp_err;
+ u32 new_hw_id;
+ u16 qid;
+
+ qos_dbg(priv->mdev, "TC_HTB_LEAF_TO_INNER classid %04x, upcoming child %04x, rate %llu, ceil %llu\n",
+ classid, child_classid, rate, ceil);
+
+ node = mlx5e_sw_node_find(priv, classid);
+ if (!node)
+ return -ENOENT;
+
+ err = mlx5_qos_create_inner_node(priv->mdev, node->parent->hw_id,
+ node->bw_share, node->max_average_bw,
+ &new_hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating an inner node.");
+ qos_err(priv->mdev, "Failed to create an inner node (class %04x), err = %d\n",
+ classid, err);
+ return err;
+ }
+
+ /* Intentionally reuse the qid for the upcoming first child. */
+ child = mlx5e_sw_node_create_leaf(priv, child_classid, node->qid, node);
+ if (IS_ERR(child)) {
+ err = PTR_ERR(child);
+ goto err_destroy_hw_node;
+ }
+
+ child->rate = rate;
+ mlx5e_htb_convert_rate(priv, rate, node, &child->bw_share);
+ mlx5e_htb_convert_ceil(priv, ceil, &child->max_average_bw);
+
+ err = mlx5_qos_create_leaf_node(priv->mdev, new_hw_id, child->bw_share,
+ child->max_average_bw, &child->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ goto err_delete_sw_node;
+ }
+
+ /* No fail point. */
+
+ qid = node->qid;
+ /* Pairs with mlx5e_get_txq_by_classid. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ err = mlx5_qos_destroy_node(priv->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ node->hw_id = new_hw_id;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, child);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, child);
+ }
+ }
+
+ return 0;
+
+err_delete_sw_node:
+ child->qid = MLX5E_QOS_QID_INNER;
+ mlx5e_sw_node_delete(priv, child);
+
+err_destroy_hw_node:
+ tmp_err = mlx5_qos_destroy_node(priv->mdev, new_hw_id);
+ if (tmp_err) /* Not fatal. */
+ qos_warn(priv->mdev, "Failed to roll back creation of an inner node %u (class %04x), err = %d\n",
+ new_hw_id, classid, tmp_err);
+ return err;
+}
+
+static struct mlx5e_qos_node *mlx5e_sw_node_find_by_qid(struct mlx5e_priv *priv, u16 qid)
+{
+ struct mlx5e_qos_node *node = NULL;
+ int bkt;
+
+ hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode)
+ if (node->qid == qid)
+ break;
+
+ return node;
+}
+
+static void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq)
+{
+ qos_dbg(priv->mdev, "Reactivate QoS SQ qid %u\n", qid);
+ netdev_tx_reset_queue(txq);
+ netif_tx_start_queue(txq);
+}
+
+static void mlx5e_reset_qdisc(struct net_device *dev, u16 qid)
+{
+ struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, qid);
+ struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+
+ if (!qdisc)
+ return;
+
+ spin_lock_bh(qdisc_lock(qdisc));
+ qdisc_reset(qdisc);
+ spin_unlock_bh(qdisc_lock(qdisc));
+}
+
+int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
+ u16 *new_qid, struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node;
+ struct netdev_queue *txq;
+ u16 qid, moved_qid;
+ bool opened;
+ int err;
+
+ qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL classid %04x\n", classid);
+
+ *old_qid = *new_qid = 0;
+
+ node = mlx5e_sw_node_find(priv, classid);
+ if (!node)
+ return -ENOENT;
+
+ /* Store qid for reuse. */
+ qid = node->qid;
+
+ opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (opened) {
+ txq = netdev_get_tx_queue(priv->netdev,
+ mlx5e_qid_from_qos(&priv->channels, qid));
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ err = mlx5_qos_destroy_node(priv->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ mlx5e_sw_node_delete(priv, node);
+
+ moved_qid = mlx5e_qos_cur_leaf_nodes(priv);
+
+ if (moved_qid == 0) {
+ /* The last QoS SQ was just destroyed. */
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, qid, txq);
+ return 0;
+ }
+ moved_qid--;
+
+ if (moved_qid < qid) {
+ /* The highest QoS SQ was just destroyed. */
+ WARN(moved_qid != qid - 1, "Gaps in queue numeration: destroyed queue %u, the highest queue is %u",
+ qid, moved_qid);
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, qid, txq);
+ return 0;
+ }
+
+ WARN(moved_qid == qid, "Can't move node with qid %u to itself", qid);
+ qos_dbg(priv->mdev, "Moving QoS SQ %u to %u\n", moved_qid, qid);
+
+ node = mlx5e_sw_node_find_by_qid(priv, moved_qid);
+ WARN(!node, "Could not find a node with qid %u to move to queue %u",
+ moved_qid, qid);
+
+ /* Stop traffic to the old queue. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+ __clear_bit(moved_qid, priv->htb.qos_used_qids);
+
+ if (opened) {
+ txq = netdev_get_tx_queue(priv->netdev,
+ mlx5e_qid_from_qos(&priv->channels, moved_qid));
+ mlx5e_deactivate_qos_sq(priv, moved_qid);
+ mlx5e_close_qos_sq(priv, moved_qid);
+ }
+
+ /* Prevent packets from the old class from getting into the new one. */
+ mlx5e_reset_qdisc(priv->netdev, moved_qid);
+
+ __set_bit(qid, priv->htb.qos_used_qids);
+ WRITE_ONCE(node->qid, qid);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x) while moving qid %u to %u, err = %d\n",
+ node->classid, moved_qid, qid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node);
+ }
+ }
+
+ mlx5e_update_tx_netdev_queues(priv);
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, moved_qid, txq);
+
+ *old_qid = mlx5e_qid_from_qos(&priv->channels, moved_qid);
+ *new_qid = mlx5e_qid_from_qos(&priv->channels, qid);
+ return 0;
+}
+
+int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *parent;
+ u32 old_hw_id, new_hw_id;
+ int err, saved_err = 0;
+ u16 qid;
+
+ qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL_LAST%s classid %04x\n",
+ force ? "_FORCE" : "", classid);
+
+ node = mlx5e_sw_node_find(priv, classid);
+ if (!node)
+ return -ENOENT;
+
+ err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->parent->hw_id,
+ node->parent->bw_share,
+ node->parent->max_average_bw,
+ &new_hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ if (!force)
+ return err;
+ saved_err = err;
+ }
+
+ /* Store qid for reuse and prevent clearing the bit. */
+ qid = node->qid;
+ /* Pairs with mlx5e_get_txq_by_classid. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ /* Prevent packets from the old class from getting into the new one. */
+ mlx5e_reset_qdisc(priv->netdev, qid);
+
+ err = mlx5_qos_destroy_node(priv->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ parent = node->parent;
+ mlx5e_sw_node_delete(priv, node);
+
+ node = parent;
+ WRITE_ONCE(node->qid, qid);
+
+ /* Early return on error in force mode. Parent will still be an inner
+ * node to be deleted by a following delete operation.
+ */
+ if (saved_err)
+ return saved_err;
+
+ old_hw_id = node->hw_id;
+ node->hw_id = new_hw_id;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node);
+ }
+ }
+
+ err = mlx5_qos_destroy_node(priv->mdev, old_hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ return 0;
+}
+
+static int mlx5e_qos_update_children(struct mlx5e_priv *priv, struct mlx5e_qos_node *node,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *child;
+ int err = 0;
+ int bkt;
+
+ hash_for_each(priv->htb.qos_tc2node, bkt, child, hnode) {
+ u32 old_bw_share = child->bw_share;
+ int err_one;
+
+ if (child->parent != node)
+ continue;
+
+ mlx5e_htb_convert_rate(priv, child->rate, node, &child->bw_share);
+ if (child->bw_share == old_bw_share)
+ continue;
+
+ err_one = mlx5_qos_update_node(priv->mdev, child->hw_id, child->bw_share,
+ child->max_average_bw, child->hw_id);
+ if (!err && err_one) {
+ err = err_one;
+
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a child node.");
+ qos_err(priv->mdev, "Failed to modify a child node (class %04x), err = %d\n",
+ node->classid, err);
+ }
+ }
+
+ return err;
+}
+
+int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack)
+{
+ u32 bw_share, max_average_bw;
+ struct mlx5e_qos_node *node;
+ bool ceil_changed = false;
+ int err;
+
+ qos_dbg(priv->mdev, "TC_HTB_LEAF_MODIFY classid %04x, rate %llu, ceil %llu\n",
+ classid, rate, ceil);
+
+ node = mlx5e_sw_node_find(priv, classid);
+ if (!node)
+ return -ENOENT;
+
+ node->rate = rate;
+ mlx5e_htb_convert_rate(priv, rate, node->parent, &bw_share);
+ mlx5e_htb_convert_ceil(priv, ceil, &max_average_bw);
+
+ err = mlx5_qos_update_node(priv->mdev, node->parent->hw_id, bw_share,
+ max_average_bw, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node.");
+ qos_err(priv->mdev, "Failed to modify a node (class %04x), err = %d\n",
+ classid, err);
+ return err;
+ }
+
+ if (max_average_bw != node->max_average_bw)
+ ceil_changed = true;
+
+ node->bw_share = bw_share;
+ node->max_average_bw = max_average_bw;
+
+ if (ceil_changed)
+ err = mlx5e_qos_update_children(priv, node, extack);
+
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
new file mode 100644
index 000000000000..5af7991fcd19
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5E_EN_QOS_H
+#define __MLX5E_EN_QOS_H
+
+#include <linux/mlx5/driver.h>
+
+#define MLX5E_QOS_MAX_LEAF_NODES 256
+
+struct mlx5e_priv;
+struct mlx5e_channels;
+struct mlx5e_channel;
+
+int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev);
+int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv);
+
+/* TX datapath API */
+int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid);
+struct mlx5e_txqsq *mlx5e_get_sq(struct mlx5e_priv *priv, int qid);
+
+/* SQ lifecycle */
+int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+void mlx5e_qos_activate_queues(struct mlx5e_priv *priv);
+void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c);
+void mlx5e_qos_close_queues(struct mlx5e_channel *c);
+
+/* HTB API */
+int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls,
+ struct netlink_ext_ack *extack);
+int mlx5e_htb_root_del(struct mlx5e_priv *priv);
+int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid,
+ u32 parent_classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack);
+int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid,
+ u64 rate, u64 ceil, struct netlink_ext_ack *extack);
+int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
+ u16 *new_qid, struct netlink_ext_ack *extack);
+int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force,
+ struct netlink_ext_ack *extack);
+int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
index 58e27038c947..be0ee03de721 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
@@ -129,10 +129,10 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
work);
struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
struct neighbour *n = update_work->n;
+ bool neigh_connected, same_dev;
struct mlx5e_encap_entry *e;
unsigned char ha[ETH_ALEN];
struct mlx5e_priv *priv;
- bool neigh_connected;
u8 nud_state, dead;
rtnl_lock();
@@ -146,12 +146,16 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
memcpy(ha, n->ha, ETH_ALEN);
nud_state = n->nud_state;
dead = n->dead;
+ same_dev = READ_ONCE(nhe->neigh_dev) == n->dev;
read_unlock_bh(&n->lock);
neigh_connected = (nud_state & NUD_VALID) && !dead;
trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
+ if (!same_dev)
+ goto out;
+
list_for_each_entry(e, &nhe->encap_list, encap_list) {
if (!mlx5e_encap_take(e))
continue;
@@ -160,6 +164,7 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
mlx5e_encap_put(priv, e);
}
+out:
rtnl_unlock();
mlx5e_release_neigh_update_work(update_work);
}
@@ -175,7 +180,6 @@ static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv
if (WARN_ON(!update_work))
return NULL;
- m_neigh.dev = n->dev;
m_neigh.family = n->ops->family;
memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
@@ -246,7 +250,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb,
rcu_read_lock();
list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
neigh_list) {
- if (p->dev == nhe->m_neigh.dev) {
+ if (p->dev == READ_ONCE(nhe->neigh_dev)) {
found = true;
break;
}
@@ -279,7 +283,7 @@ int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
if (err)
- return err;
+ goto out_err;
INIT_LIST_HEAD(&neigh_update->neigh_list);
mutex_init(&neigh_update->encap_lock);
@@ -287,14 +291,19 @@ int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
mlx5e_rep_neigh_stats_work);
mlx5e_rep_neigh_update_init_interval(rpriv);
- rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event;
- err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb);
+ neigh_update->netevent_nb.notifier_call = mlx5e_rep_netevent_event;
+ err = register_netevent_notifier(&neigh_update->netevent_nb);
if (err)
- goto out_err;
+ goto out_notifier;
return 0;
-out_err:
+out_notifier:
+ neigh_update->netevent_nb.notifier_call = NULL;
rhashtable_destroy(&neigh_update->neigh_ht);
+out_err:
+ netdev_warn(rpriv->netdev,
+ "Failed to initialize neighbours handling for vport %d\n",
+ rpriv->rep->vport);
return err;
}
@@ -303,6 +312,9 @@ void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ if (!rpriv->neigh_update.netevent_nb.notifier_call)
+ return;
+
unregister_netevent_notifier(&neigh_update->netevent_nb);
flush_workqueue(priv->wq); /* flush neigh update works */
@@ -361,7 +373,8 @@ mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
}
int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev,
struct mlx5e_neigh_hash_entry **nhe)
{
int err;
@@ -371,10 +384,11 @@ int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
return -ENOMEM;
(*nhe)->priv = priv;
- memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
+ memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh));
spin_lock_init(&(*nhe)->encap_list_lock);
INIT_LIST_HEAD(&(*nhe)->encap_list);
refcount_set(&(*nhe)->refcnt, 1);
+ WRITE_ONCE((*nhe)->neigh_dev, neigh_dev);
err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
index 32b239189c95..6fe0ab970943 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
@@ -16,7 +16,8 @@ struct mlx5e_neigh_hash_entry *
mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
struct mlx5e_neigh *m_neigh);
int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev,
struct mlx5e_neigh_hash_entry **nhe);
void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 76177f7c5ec2..065126370acd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -26,7 +26,9 @@ struct mlx5e_rep_indr_block_priv {
};
int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e)
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
@@ -39,9 +41,9 @@ int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
return err;
mutex_lock(&rpriv->neigh_update.encap_lock);
- nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, m_neigh);
if (!nhe) {
- err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
+ err = mlx5e_rep_neigh_entry_create(priv, m_neigh, neigh_dev, &nhe);
if (err) {
mutex_unlock(&rpriv->neigh_update.encap_lock);
mlx5_tun_entropy_refcount_dec(tun_entropy,
@@ -122,7 +124,7 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
}
unlock:
mutex_unlock(&esw->offloads.encap_tbl_lock);
- mlx5e_put_encap_flow_list(priv, &flow_list);
+ mlx5e_put_flow_list(priv, &flow_list);
}
static int
@@ -651,7 +653,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
tc_skb_ext->chain = chain;
- zone_restore_id = reg_c1 & ZONE_RESTORE_MAX;
+ zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK;
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
uplink_priv = &uplink_rpriv->uplink_priv;
@@ -660,7 +662,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
return false;
}
- tunnel_id = reg_c1 >> REG_MAPPING_SHIFT(TUNNEL_TO_REG);
+ tunnel_id = reg_c1 >> ESW_TUN_OFFSET;
return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
#endif /* CONFIG_NET_TC_SKB_EXT */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
index fdf9702c2d7d..d0661578467b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
@@ -27,7 +27,9 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
unsigned char ha[ETH_ALEN]);
int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e);
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev);
void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 072363e73f1c..f3f6eb081948 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -12,6 +12,7 @@
#include <net/flow_offload.h>
#include <net/netfilter/nf_flow_table.h>
#include <linux/workqueue.h>
+#include <linux/refcount.h>
#include <linux/xarray.h>
#include "lib/fs_chains.h"
@@ -27,6 +28,7 @@
#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
#define MLX5_CT_STATE_TRK_BIT BIT(2)
#define MLX5_CT_STATE_NAT_BIT BIT(3)
+#define MLX5_CT_STATE_REPLY_BIT BIT(4)
#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
@@ -51,11 +53,11 @@ struct mlx5_tc_ct_priv {
struct mlx5_flow_table *ct_nat;
struct mlx5_flow_table *post_ct;
struct mutex control_lock; /* guards parallel adds/dels */
- struct mutex shared_counter_lock;
struct mapping_ctx *zone_mapping;
struct mapping_ctx *labels_mapping;
enum mlx5_flow_namespace_type ns_type;
struct mlx5_fs_chains *chains;
+ spinlock_t ht_lock; /* protects ft entries */
};
struct mlx5_ct_flow {
@@ -124,6 +126,10 @@ struct mlx5_ct_counter {
bool is_shared;
};
+enum {
+ MLX5_CT_ENTRY_FLAG_VALID,
+};
+
struct mlx5_ct_entry {
struct rhash_head node;
struct rhash_head tuple_node;
@@ -134,6 +140,12 @@ struct mlx5_ct_entry {
struct mlx5_ct_tuple tuple;
struct mlx5_ct_tuple tuple_nat;
struct mlx5_ct_zone_rule zone_rules[2];
+
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct work_struct work;
+
+ refcount_t refcnt;
+ unsigned long flags;
};
static const struct rhashtable_params cts_ht_params = {
@@ -167,6 +179,12 @@ static const struct rhashtable_params tuples_nat_ht_params = {
.min_size = 16 * 1024,
};
+static bool
+mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
+{
+ return !!(entry->tuple_nat_node.next);
+}
+
static int
mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
{
@@ -635,6 +653,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
}
ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
+ ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
ct_state,
meta->ct_metadata.mark,
@@ -703,11 +722,11 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
attr->outer_match_level = MLX5_MATCH_L4;
attr->counter = entry->counter->counter;
attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
+ if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ attr->esw_attr->in_mdev = priv->mdev;
mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
- mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
- entry->tuple.zone & MLX5_CT_ZONE_MASK,
- MLX5_CT_ZONE_MASK);
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
if (IS_ERR(zone_rule->rule)) {
@@ -734,6 +753,87 @@ err_attr:
return err;
}
+static bool
+mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
+{
+ return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
+}
+
+static struct mlx5_ct_entry *
+mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
+{
+ struct mlx5_ct_entry *entry;
+
+ entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
+ tuples_ht_params);
+ if (entry && mlx5_tc_ct_entry_valid(entry) &&
+ refcount_inc_not_zero(&entry->refcnt)) {
+ return entry;
+ } else if (!entry) {
+ entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
+ tuple, tuples_nat_ht_params);
+ if (entry && mlx5_tc_ct_entry_valid(entry) &&
+ refcount_inc_not_zero(&entry->refcnt))
+ return entry;
+ }
+
+ return entry ? ERR_PTR(-EINVAL) : NULL;
+}
+
+static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
+{
+ struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
+
+ rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node,
+ tuples_nat_ht_params);
+ rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
+ tuples_ht_params);
+}
+
+static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
+{
+ struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
+
+ mlx5_tc_ct_entry_del_rules(ct_priv, entry);
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ mlx5_tc_ct_entry_remove_from_tuples(entry);
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_counter_put(ct_priv, entry);
+ kfree(entry);
+}
+
+static void
+mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
+{
+ if (!refcount_dec_and_test(&entry->refcnt))
+ return;
+
+ mlx5_tc_ct_entry_del(entry);
+}
+
+static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
+{
+ struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
+
+ mlx5_tc_ct_entry_del(entry);
+}
+
+static void
+__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
+{
+ struct mlx5e_priv *priv;
+
+ if (!refcount_dec_and_test(&entry->refcnt))
+ return;
+
+ priv = netdev_priv(entry->ct_priv->netdev);
+ INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
+ queue_work(priv->wq, &entry->work);
+}
+
static struct mlx5_ct_counter *
mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
{
@@ -764,7 +864,6 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_ct_counter *shared_counter;
struct mlx5_ct_entry *rev_entry;
__be16 tmp_port;
- int ret;
/* get the reversed tuple */
tmp_port = rev_tuple.port.src;
@@ -786,23 +885,31 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
}
/* Use the same counter as the reverse direction */
- mutex_lock(&ct_priv->shared_counter_lock);
- rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple,
- tuples_ht_params);
- if (rev_entry) {
- if (refcount_inc_not_zero(&rev_entry->counter->refcount)) {
- mutex_unlock(&ct_priv->shared_counter_lock);
- return rev_entry->counter;
- }
+ spin_lock_bh(&ct_priv->ht_lock);
+ rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
+
+ if (IS_ERR(rev_entry)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ goto create_counter;
}
- mutex_unlock(&ct_priv->shared_counter_lock);
- shared_counter = mlx5_tc_ct_counter_create(ct_priv);
- if (IS_ERR(shared_counter)) {
- ret = PTR_ERR(shared_counter);
- return ERR_PTR(ret);
+ if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
+ ct_dbg("Using shared counter entry=0x%p rev=0x%p\n", entry, rev_entry);
+ shared_counter = rev_entry->counter;
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_entry_put(rev_entry);
+ return shared_counter;
}
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+create_counter:
+
+ shared_counter = mlx5_tc_ct_counter_create(ct_priv);
+ if (IS_ERR(shared_counter))
+ return shared_counter;
+
shared_counter->is_shared = true;
refcount_set(&shared_counter->refcount, 1);
return shared_counter;
@@ -860,10 +967,14 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
if (!meta_action)
return -EOPNOTSUPP;
- entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
- cts_ht_params);
- if (entry)
- return 0;
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (entry && refcount_inc_not_zero(&entry->refcnt)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ mlx5_tc_ct_entry_put(entry);
+ return -EEXIST;
+ }
+ spin_unlock_bh(&ct_priv->ht_lock);
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
@@ -872,6 +983,8 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
entry->tuple.zone = ft->zone;
entry->cookie = flow->cookie;
entry->restore_cookie = meta_action->ct_metadata.cookie;
+ refcount_set(&entry->refcnt, 2);
+ entry->ct_priv = ct_priv;
err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
if (err)
@@ -882,84 +995,85 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
if (err)
goto err_set;
- err = rhashtable_insert_fast(&ct_priv->ct_tuples_ht,
- &entry->tuple_node,
- tuples_ht_params);
+ spin_lock_bh(&ct_priv->ht_lock);
+
+ err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
+ cts_ht_params);
+ if (err)
+ goto err_entries;
+
+ err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
+ &entry->tuple_node,
+ tuples_ht_params);
if (err)
goto err_tuple;
if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
- err = rhashtable_insert_fast(&ct_priv->ct_tuples_nat_ht,
- &entry->tuple_nat_node,
- tuples_nat_ht_params);
+ err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node,
+ tuples_nat_ht_params);
if (err)
goto err_tuple_nat;
}
+ spin_unlock_bh(&ct_priv->ht_lock);
err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
ft->zone_restore_id);
if (err)
goto err_rules;
- err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node,
- cts_ht_params);
- if (err)
- goto err_insert;
+ set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
+ mlx5_tc_ct_entry_put(entry); /* this function reference */
return 0;
-err_insert:
- mlx5_tc_ct_entry_del_rules(ct_priv, entry);
err_rules:
- rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
- &entry->tuple_nat_node, tuples_nat_ht_params);
+ spin_lock_bh(&ct_priv->ht_lock);
+ if (mlx5_tc_ct_entry_has_nat(entry))
+ rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node, tuples_nat_ht_params);
err_tuple_nat:
- if (entry->tuple_node.next)
- rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
- &entry->tuple_node,
- tuples_ht_params);
+ rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
+ &entry->tuple_node,
+ tuples_ht_params);
err_tuple:
+ rhashtable_remove_fast(&ft->ct_entries_ht,
+ &entry->node,
+ cts_ht_params);
+err_entries:
+ spin_unlock_bh(&ct_priv->ht_lock);
err_set:
kfree(entry);
- netdev_warn(ct_priv->netdev,
- "Failed to offload ct entry, err: %d\n", err);
+ if (err != -EEXIST)
+ netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
return err;
}
-static void
-mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv,
- struct mlx5_ct_entry *entry)
-{
- mlx5_tc_ct_entry_del_rules(ct_priv, entry);
- mutex_lock(&ct_priv->shared_counter_lock);
- if (entry->tuple_node.next)
- rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
- &entry->tuple_nat_node,
- tuples_nat_ht_params);
- rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
- tuples_ht_params);
- mutex_unlock(&ct_priv->shared_counter_lock);
- mlx5_tc_ct_counter_put(ct_priv, entry);
-
-}
-
static int
mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
struct flow_cls_offload *flow)
{
+ struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
unsigned long cookie = flow->cookie;
struct mlx5_ct_entry *entry;
- entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
- cts_ht_params);
- if (!entry)
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (!entry) {
+ spin_unlock_bh(&ct_priv->ht_lock);
return -ENOENT;
+ }
- mlx5_tc_ct_del_ft_entry(ft->ct_priv, entry);
- WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht,
- &entry->node,
- cts_ht_params));
- kfree(entry);
+ if (!mlx5_tc_ct_entry_valid(entry)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -EINVAL;
+ }
+
+ rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
+ mlx5_tc_ct_entry_remove_from_tuples(entry);
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_entry_put(entry);
return 0;
}
@@ -968,19 +1082,30 @@ static int
mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
struct flow_cls_offload *f)
{
+ struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
unsigned long cookie = f->cookie;
struct mlx5_ct_entry *entry;
u64 lastuse, packets, bytes;
- entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
- cts_ht_params);
- if (!entry)
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (!entry) {
+ spin_unlock_bh(&ct_priv->ht_lock);
return -ENOENT;
+ }
+
+ if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -EINVAL;
+ }
+
+ spin_unlock_bh(&ct_priv->ht_lock);
mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
FLOW_ACTION_HW_STATS_DELAYED);
+ mlx5_tc_ct_entry_put(entry);
return 0;
}
@@ -1082,8 +1207,8 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
struct netlink_ext_ack *extack)
{
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ bool trk, est, untrk, unest, new, rpl, unrpl;
struct flow_dissector_key_ct *mask, *key;
- bool trk, est, untrk, unest, new;
u32 ctstate = 0, ctstate_mask = 0;
u16 ct_state_on, ct_state_off;
u16 ct_state, ct_state_mask;
@@ -1109,9 +1234,10 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
- TCA_FLOWER_KEY_CT_FLAGS_NEW)) {
+ TCA_FLOWER_KEY_CT_FLAGS_NEW |
+ TCA_FLOWER_KEY_CT_FLAGS_REPLY)) {
NL_SET_ERR_MSG_MOD(extack,
- "only ct_state trk, est and new are supported for offload");
+ "only ct_state trk, est, new and rpl are supported for offload");
return -EOPNOTSUPP;
}
@@ -1120,13 +1246,17 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
+ rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
+ unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
+ ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
+ ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
if (new) {
NL_SET_ERR_MSG_MOD(extack,
@@ -1241,9 +1371,8 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
pre_ct->flow_rule = rule;
/* add miss rule */
- memset(spec, 0, sizeof(*spec));
dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
- rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
ct_dbg("Failed to add pre ct miss rule zone %d", zone);
@@ -1472,11 +1601,9 @@ err_mapping:
static void
mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
{
- struct mlx5_tc_ct_priv *ct_priv = arg;
struct mlx5_ct_entry *entry = ptr;
- mlx5_tc_ct_del_ft_entry(ct_priv, entry);
- kfree(entry);
+ mlx5_tc_ct_entry_put(entry);
}
static void
@@ -1754,7 +1881,6 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
goto err_set_registers;
}
- dealloc_mod_hdr_actions(mod_acts);
pre_ct_attr->modify_hdr = mod_hdr;
pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
@@ -1954,6 +2080,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
goto err_mapping_labels;
}
+ spin_lock_init(&ct_priv->ht_lock);
ct_priv->ns_type = ns_type;
ct_priv->chains = chains;
ct_priv->netdev = priv->netdev;
@@ -1988,7 +2115,6 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
idr_init(&ct_priv->fte_ids);
mutex_init(&ct_priv->control_lock);
- mutex_init(&ct_priv->shared_counter_lock);
rhashtable_init(&ct_priv->zone_ht, &zone_params);
rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
@@ -2031,7 +2157,6 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
rhashtable_destroy(&ct_priv->zone_ht);
mutex_destroy(&ct_priv->control_lock);
- mutex_destroy(&ct_priv->shared_counter_lock);
idr_destroy(&ct_priv->fte_ids);
kfree(ct_priv);
}
@@ -2053,14 +2178,22 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
return false;
- entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple,
- tuples_ht_params);
- if (!entry)
- entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
- &tuple, tuples_nat_ht_params);
- if (!entry)
+ spin_lock(&ct_priv->ht_lock);
+
+ entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
+ if (!entry) {
+ spin_unlock(&ct_priv->ht_lock);
return false;
+ }
+
+ if (IS_ERR(entry)) {
+ spin_unlock(&ct_priv->ht_lock);
+ return false;
+ }
+ spin_unlock(&ct_priv->ht_lock);
tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
+ __mlx5_tc_ct_entry_put(entry);
+
return true;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
index 6503b614337c..69e618d17071 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -73,7 +73,7 @@ struct mlx5_ct_attr {
#define zone_restore_to_reg_ct {\
.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,\
.moffset = 0,\
- .mlen = 1,\
+ .mlen = (ESW_ZONE_ID_BITS / 8),\
.soffset = MLX5_BYTE_OFF(fte_match_param,\
misc_parameters_2.metadata_reg_c_1) + 3,\
}
@@ -81,14 +81,12 @@ struct mlx5_ct_attr {
#define nic_zone_restore_to_reg_ct {\
.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,\
.moffset = 2,\
- .mlen = 1,\
+ .mlen = (ESW_ZONE_ID_BITS / 8),\
}
#define REG_MAPPING_MLEN(reg) (mlx5e_tc_attr_to_reg_mappings[reg].mlen)
#define REG_MAPPING_MOFFSET(reg) (mlx5e_tc_attr_to_reg_mappings[reg].moffset)
#define REG_MAPPING_SHIFT(reg) (REG_MAPPING_MOFFSET(reg) * 8)
-#define ZONE_RESTORE_BITS (REG_MAPPING_MLEN(ZONE_RESTORE_TO_REG) * 8)
-#define ZONE_RESTORE_MAX GENMASK(ZONE_RESTORE_BITS - 1, 0)
#if IS_ENABLED(CONFIG_MLX5_TC_CT)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
new file mode 100644
index 000000000000..c223591ffc22
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_PRIV_H__
+#define __MLX5_EN_TC_PRIV_H__
+
+#include "en_tc.h"
+
+#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
+
+#define MLX5E_TC_MAX_SPLITS 1
+
+enum {
+ MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT,
+ MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT,
+ MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
+ MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
+ MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
+ MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE,
+ MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1,
+ MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2,
+ MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3,
+ MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4,
+ MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5,
+ MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6,
+ MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7,
+ MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
+ MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 9,
+ MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 10,
+};
+
+struct mlx5e_tc_flow_parse_attr {
+ const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct net_device *filter_dev;
+ struct mlx5_flow_spec spec;
+ struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
+ int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct ethhdr eth;
+};
+
+/* Helper struct for accessing a struct containing list_head array.
+ * Containing struct
+ * |- Helper array
+ * [0] Helper item 0
+ * |- list_head item 0
+ * |- index (0)
+ * [1] Helper item 1
+ * |- list_head item 1
+ * |- index (1)
+ * To access the containing struct from one of the list_head items:
+ * 1. Get the helper item from the list_head item using
+ * helper item =
+ * container_of(list_head item, helper struct type, list_head field)
+ * 2. Get the contining struct from the helper item and its index in the array:
+ * containing struct =
+ * container_of(helper item, containing struct type, helper field[index])
+ */
+struct encap_flow_item {
+ struct mlx5e_encap_entry *e; /* attached encap instance */
+ struct list_head list;
+ int index;
+};
+
+struct encap_route_flow_item {
+ struct mlx5e_route_entry *r; /* attached route instance */
+ int index;
+};
+
+struct mlx5e_tc_flow {
+ struct rhash_head node;
+ struct mlx5e_priv *priv;
+ u64 cookie;
+ unsigned long flags;
+ struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
+
+ /* flows sharing the same reformat object - currently mpls decap */
+ struct list_head l3_to_l2_reformat;
+ struct mlx5e_decap_entry *decap_reformat;
+
+ /* flows sharing same route entry */
+ struct list_head decap_routes;
+ struct mlx5e_route_entry *decap_route;
+ struct encap_route_flow_item encap_routes[MLX5_MAX_FLOW_FWD_VPORTS];
+
+ /* Flow can be associated with multiple encap IDs.
+ * The number of encaps is bounded by the number of supported
+ * destinations.
+ */
+ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_tc_flow *peer_flow;
+ struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */
+ struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
+ struct list_head hairpin; /* flows sharing the same hairpin */
+ struct list_head peer; /* flows with peer flow */
+ struct list_head unready; /* flows not ready to be offloaded (e.g
+ * due to missing route)
+ */
+ struct net_device *orig_dev; /* netdev adding flow first */
+ int tmp_entry_index;
+ struct list_head tmp_list; /* temporary flow list used by neigh update */
+ refcount_t refcnt;
+ struct rcu_head rcu_head;
+ struct completion init_done;
+ int tunnel_id; /* the mapped tunnel id of this flow */
+ struct mlx5_flow_attr *attr;
+};
+
+u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer);
+
+struct mlx5_flow_handle *
+mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+
+bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow);
+
+static inline void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ /* Complete all memory stores before setting bit. */
+ smp_mb__before_atomic();
+ set_bit(flag, &flow->flags);
+}
+
+#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
+
+static inline bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
+ unsigned long flag)
+{
+ /* test_and_set_bit() provides all necessary barriers */
+ return test_and_set_bit(flag, &flow->flags);
+}
+
+#define flow_flag_test_and_set(flow, flag) \
+ __flow_flag_test_and_set(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+static inline void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ /* Complete all memory stores before clearing bit. */
+ smp_mb__before_atomic();
+ clear_bit(flag, &flow->flags);
+}
+
+#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+static inline bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ bool ret = test_bit(flag, &flow->flags);
+
+ /* Read fields of flow structure only after checking flags. */
+ smp_mb__after_atomic();
+ return ret;
+}
+
+#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow);
+struct mlx5_flow_handle *
+mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec);
+void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr);
+
+struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow);
+void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow);
+
+struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow);
+
+#endif /* __MLX5_EN_TC_PRIV_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index 90930e54b6f2..f8075a604605 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -6,10 +6,32 @@
#include <net/geneve.h>
#include <net/bareudp.h>
#include "en/tc_tun.h"
+#include "en/tc_priv.h"
#include "en_tc.h"
#include "rep/tc.h"
#include "rep/neigh.h"
+struct mlx5e_tc_tun_route_attr {
+ struct net_device *out_dev;
+ struct net_device *route_dev;
+ union {
+ struct flowi4 fl4;
+ struct flowi6 fl6;
+ } fl;
+ struct neighbour *n;
+ u8 ttl;
+};
+
+#define TC_TUN_ROUTE_ATTR_INIT(name) struct mlx5e_tc_tun_route_attr name = {}
+
+static void mlx5e_tc_tun_route_attr_cleanup(struct mlx5e_tc_tun_route_attr *attr)
+{
+ if (attr->n)
+ neigh_release(attr->n);
+ if (attr->route_dev)
+ dev_put(attr->route_dev);
+}
+
struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
{
if (netif_is_vxlan(tunnel_dev))
@@ -79,12 +101,10 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv,
static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
- struct net_device **out_dev,
- struct net_device **route_dev,
- struct flowi4 *fl4,
- struct neighbour **out_n,
- u8 *out_ttl)
+ struct mlx5e_tc_tun_route_attr *attr)
{
+ struct net_device *route_dev;
+ struct net_device *out_dev;
struct neighbour *n;
struct rtable *rt;
@@ -97,46 +117,50 @@ static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = mdev->priv.eswitch;
uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
- fl4->flowi4_oif = uplink_dev->ifindex;
+ attr->fl.fl4.flowi4_oif = uplink_dev->ifindex;
}
- rt = ip_route_output_key(dev_net(mirred_dev), fl4);
+ rt = ip_route_output_key(dev_net(mirred_dev), &attr->fl.fl4);
if (IS_ERR(rt))
return PTR_ERR(rt);
if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) {
- ip_rt_put(rt);
- return -ENETUNREACH;
+ ret = -ENETUNREACH;
+ goto err_rt_release;
}
#else
return -EOPNOTSUPP;
#endif
- ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev);
- if (ret < 0) {
- ip_rt_put(rt);
- return ret;
- }
- dev_hold(*route_dev);
+ ret = get_route_and_out_devs(priv, rt->dst.dev, &route_dev, &out_dev);
+ if (ret < 0)
+ goto err_rt_release;
+ dev_hold(route_dev);
- if (!(*out_ttl))
- *out_ttl = ip4_dst_hoplimit(&rt->dst);
- n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
- ip_rt_put(rt);
+ if (!attr->ttl)
+ attr->ttl = ip4_dst_hoplimit(&rt->dst);
+ n = dst_neigh_lookup(&rt->dst, &attr->fl.fl4.daddr);
if (!n) {
- dev_put(*route_dev);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err_dev_release;
}
- *out_n = n;
+ ip_rt_put(rt);
+ attr->route_dev = route_dev;
+ attr->out_dev = out_dev;
+ attr->n = n;
return 0;
+
+err_dev_release:
+ dev_put(route_dev);
+err_rt_release:
+ ip_rt_put(rt);
+ return ret;
}
-static void mlx5e_route_lookup_ipv4_put(struct net_device *route_dev,
- struct neighbour *n)
+static void mlx5e_route_lookup_ipv4_put(struct mlx5e_tc_tun_route_attr *attr)
{
- neigh_release(n);
- dev_put(route_dev);
+ mlx5e_tc_tun_route_attr_cleanup(attr);
}
static const char *mlx5e_netdev_kind(struct net_device *dev)
@@ -188,28 +212,26 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
{
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
const struct ip_tunnel_key *tun_key = &e->tun_info->key;
- struct net_device *out_dev, *route_dev;
- struct flowi4 fl4 = {};
- struct neighbour *n;
+ struct mlx5e_neigh m_neigh = {};
+ TC_TUN_ROUTE_ATTR_INIT(attr);
int ipv4_encap_size;
char *encap_header;
- u8 nud_state, ttl;
struct iphdr *ip;
+ u8 nud_state;
int err;
/* add the IP fields */
- fl4.flowi4_tos = tun_key->tos;
- fl4.daddr = tun_key->u.ipv4.dst;
- fl4.saddr = tun_key->u.ipv4.src;
- ttl = tun_key->ttl;
+ attr.fl.fl4.flowi4_tos = tun_key->tos;
+ attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
+ attr.fl.fl4.saddr = tun_key->u.ipv4.src;
+ attr.ttl = tun_key->ttl;
- err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &out_dev, &route_dev,
- &fl4, &n, &ttl);
+ err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr);
if (err)
return err;
ipv4_encap_size =
- (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
sizeof(struct iphdr) +
e->tunnel->calc_hlen(e);
@@ -226,40 +248,36 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
goto release_neigh;
}
- /* used by mlx5e_detach_encap to lookup a neigh hash table
- * entry in the neigh hash table when a user deletes a rule
- */
- e->m_neigh.dev = n->dev;
- e->m_neigh.family = n->ops->family;
- memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
- e->out_dev = out_dev;
- e->route_dev_ifindex = route_dev->ifindex;
+ m_neigh.family = attr.n->ops->family;
+ memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
+ e->out_dev = attr.out_dev;
+ e->route_dev_ifindex = attr.route_dev->ifindex;
/* It's important to add the neigh to the hash table before checking
* the neigh validity state. So if we'll get a notification, in case the
* neigh changes it's validity state, we would find the relevant neigh
* in the hash.
*/
- err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
if (err)
goto free_encap;
- read_lock_bh(&n->lock);
- nud_state = n->nud_state;
- ether_addr_copy(e->h_dest, n->ha);
- read_unlock_bh(&n->lock);
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ read_unlock_bh(&attr.n->lock);
/* add ethernet header */
- ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
+ ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
ETH_P_IP);
/* add ip header */
ip->tos = tun_key->tos;
ip->version = 0x4;
ip->ihl = 0x5;
- ip->ttl = ttl;
- ip->daddr = fl4.daddr;
- ip->saddr = fl4.saddr;
+ ip->ttl = attr.ttl;
+ ip->daddr = attr.fl.fl4.daddr;
+ ip->saddr = attr.fl.fl4.saddr;
/* add tunneling protocol header */
err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
@@ -271,7 +289,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
e->encap_header = encap_header;
if (!(nud_state & NUD_VALID)) {
- neigh_event_send(n, NULL);
+ neigh_event_send(attr.n, NULL);
/* the encap entry will be made valid on neigh update event
* and not used before that.
*/
@@ -287,8 +305,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
}
e->flags |= MLX5_ENCAP_ENTRY_VALID;
- mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
- mlx5e_route_lookup_ipv4_put(route_dev, n);
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv4_put(&attr);
return err;
destroy_neigh_entry:
@@ -296,55 +314,155 @@ destroy_neigh_entry:
free_encap:
kfree(encap_header);
release_neigh:
- mlx5e_route_lookup_ipv4_put(route_dev, n);
+ mlx5e_route_lookup_ipv4_put(&attr);
+ return err;
+}
+
+int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ int ipv4_encap_size;
+ char *encap_header;
+ struct iphdr *ip;
+ u8 nud_state;
+ int err;
+
+ /* add the IP fields */
+ attr.fl.fl4.flowi4_tos = tun_key->tos;
+ attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
+ attr.fl.fl4.saddr = tun_key->u.ipv4.src;
+ attr.ttl = tun_key->ttl;
+
+ err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr);
+ if (err)
+ return err;
+
+ ipv4_encap_size =
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct iphdr) +
+ e->tunnel->calc_hlen(e);
+
+ if (max_encap_size < ipv4_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv4_encap_size, max_encap_size);
+ err = -EOPNOTSUPP;
+ goto release_neigh;
+ }
+
+ encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
+ if (!encap_header) {
+ err = -ENOMEM;
+ goto release_neigh;
+ }
+
+ e->route_dev_ifindex = attr.route_dev->ifindex;
+
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
+ read_unlock_bh(&attr.n->lock);
+
+ /* add ethernet header */
+ ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
+ ETH_P_IP);
+
+ /* add ip header */
+ ip->tos = tun_key->tos;
+ ip->version = 0x4;
+ ip->ihl = 0x5;
+ ip->ttl = attr.ttl;
+ ip->daddr = attr.fl.fl4.daddr;
+ ip->saddr = attr.fl.fl4.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
+ &ip->protocol, e);
+ if (err)
+ goto free_encap;
+
+ e->encap_size = ipv4_encap_size;
+ kfree(e->encap_header);
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(attr.n, NULL);
+ /* the encap entry will be made valid on neigh update event
+ * and not used before that.
+ */
+ goto release_neigh;
+ }
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
+ ipv4_encap_size, encap_header,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ err = PTR_ERR(e->pkt_reformat);
+ goto free_encap;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv4_put(&attr);
+ return err;
+
+free_encap:
+ kfree(encap_header);
+release_neigh:
+ mlx5e_route_lookup_ipv4_put(&attr);
return err;
}
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
- struct net_device **out_dev,
- struct net_device **route_dev,
- struct flowi6 *fl6,
- struct neighbour **out_n,
- u8 *out_ttl)
+ struct mlx5e_tc_tun_route_attr *attr)
{
+ struct net_device *route_dev;
+ struct net_device *out_dev;
struct dst_entry *dst;
struct neighbour *n;
-
int ret;
- dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, fl6,
+ dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, &attr->fl.fl6,
NULL);
if (IS_ERR(dst))
return PTR_ERR(dst);
- if (!(*out_ttl))
- *out_ttl = ip6_dst_hoplimit(dst);
+ if (!attr->ttl)
+ attr->ttl = ip6_dst_hoplimit(dst);
- ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev);
- if (ret < 0) {
- dst_release(dst);
- return ret;
- }
+ ret = get_route_and_out_devs(priv, dst->dev, &route_dev, &out_dev);
+ if (ret < 0)
+ goto err_dst_release;
- dev_hold(*route_dev);
- n = dst_neigh_lookup(dst, &fl6->daddr);
- dst_release(dst);
+ dev_hold(route_dev);
+ n = dst_neigh_lookup(dst, &attr->fl.fl6.daddr);
if (!n) {
- dev_put(*route_dev);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err_dev_release;
}
- *out_n = n;
+ dst_release(dst);
+ attr->out_dev = out_dev;
+ attr->route_dev = route_dev;
+ attr->n = n;
return 0;
+
+err_dev_release:
+ dev_put(route_dev);
+err_dst_release:
+ dst_release(dst);
+ return ret;
}
-static void mlx5e_route_lookup_ipv6_put(struct net_device *route_dev,
- struct neighbour *n)
+static void mlx5e_route_lookup_ipv6_put(struct mlx5e_tc_tun_route_attr *attr)
{
- neigh_release(n);
- dev_put(route_dev);
+ mlx5e_tc_tun_route_attr_cleanup(attr);
}
int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
@@ -353,28 +471,25 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
{
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
const struct ip_tunnel_key *tun_key = &e->tun_info->key;
- struct net_device *out_dev, *route_dev;
- struct flowi6 fl6 = {};
+ struct mlx5e_neigh m_neigh = {};
+ TC_TUN_ROUTE_ATTR_INIT(attr);
struct ipv6hdr *ip6h;
- struct neighbour *n = NULL;
int ipv6_encap_size;
char *encap_header;
- u8 nud_state, ttl;
+ u8 nud_state;
int err;
- ttl = tun_key->ttl;
+ attr.ttl = tun_key->ttl;
+ attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
+ attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
+ attr.fl.fl6.saddr = tun_key->u.ipv6.src;
- fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
- fl6.daddr = tun_key->u.ipv6.dst;
- fl6.saddr = tun_key->u.ipv6.src;
-
- err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &out_dev, &route_dev,
- &fl6, &n, &ttl);
+ err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr);
if (err)
return err;
ipv6_encap_size =
- (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
sizeof(struct ipv6hdr) +
e->tunnel->calc_hlen(e);
@@ -391,39 +506,35 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
goto release_neigh;
}
- /* used by mlx5e_detach_encap to lookup a neigh hash table
- * entry in the neigh hash table when a user deletes a rule
- */
- e->m_neigh.dev = n->dev;
- e->m_neigh.family = n->ops->family;
- memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
- e->out_dev = out_dev;
- e->route_dev_ifindex = route_dev->ifindex;
+ m_neigh.family = attr.n->ops->family;
+ memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
+ e->out_dev = attr.out_dev;
+ e->route_dev_ifindex = attr.route_dev->ifindex;
/* It's importent to add the neigh to the hash table before checking
* the neigh validity state. So if we'll get a notification, in case the
* neigh changes it's validity state, we would find the relevant neigh
* in the hash.
*/
- err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
if (err)
goto free_encap;
- read_lock_bh(&n->lock);
- nud_state = n->nud_state;
- ether_addr_copy(e->h_dest, n->ha);
- read_unlock_bh(&n->lock);
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ read_unlock_bh(&attr.n->lock);
/* add ethernet header */
- ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
+ ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
ETH_P_IPV6);
/* add ip header */
ip6_flow_hdr(ip6h, tun_key->tos, 0);
/* the HW fills up ipv6 payload len */
- ip6h->hop_limit = ttl;
- ip6h->daddr = fl6.daddr;
- ip6h->saddr = fl6.saddr;
+ ip6h->hop_limit = attr.ttl;
+ ip6h->daddr = attr.fl.fl6.daddr;
+ ip6h->saddr = attr.fl.fl6.saddr;
/* add tunneling protocol header */
err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
@@ -435,7 +546,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
e->encap_header = encap_header;
if (!(nud_state & NUD_VALID)) {
- neigh_event_send(n, NULL);
+ neigh_event_send(attr.n, NULL);
/* the encap entry will be made valid on neigh update event
* and not used before that.
*/
@@ -452,8 +563,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
}
e->flags |= MLX5_ENCAP_ENTRY_VALID;
- mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
- mlx5e_route_lookup_ipv6_put(route_dev, n);
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv6_put(&attr);
return err;
destroy_neigh_entry:
@@ -461,10 +572,160 @@ destroy_neigh_entry:
free_encap:
kfree(encap_header);
release_neigh:
- mlx5e_route_lookup_ipv6_put(route_dev, n);
+ mlx5e_route_lookup_ipv6_put(&attr);
return err;
}
+
+int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ struct ipv6hdr *ip6h;
+ int ipv6_encap_size;
+ char *encap_header;
+ u8 nud_state;
+ int err;
+
+ attr.ttl = tun_key->ttl;
+
+ attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
+ attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
+ attr.fl.fl6.saddr = tun_key->u.ipv6.src;
+
+ err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr);
+ if (err)
+ return err;
+
+ ipv6_encap_size =
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct ipv6hdr) +
+ e->tunnel->calc_hlen(e);
+
+ if (max_encap_size < ipv6_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv6_encap_size, max_encap_size);
+ err = -EOPNOTSUPP;
+ goto release_neigh;
+ }
+
+ encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
+ if (!encap_header) {
+ err = -ENOMEM;
+ goto release_neigh;
+ }
+
+ e->route_dev_ifindex = attr.route_dev->ifindex;
+
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
+ read_unlock_bh(&attr.n->lock);
+
+ /* add ethernet header */
+ ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
+ ETH_P_IPV6);
+
+ /* add ip header */
+ ip6_flow_hdr(ip6h, tun_key->tos, 0);
+ /* the HW fills up ipv6 payload len */
+ ip6h->hop_limit = attr.ttl;
+ ip6h->daddr = attr.fl.fl6.daddr;
+ ip6h->saddr = attr.fl.fl6.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
+ &ip6h->nexthdr, e);
+ if (err)
+ goto free_encap;
+
+ e->encap_size = ipv6_encap_size;
+ kfree(e->encap_header);
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(attr.n, NULL);
+ /* the encap entry will be made valid on neigh update event
+ * and not used before that.
+ */
+ goto release_neigh;
+ }
+
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
+ ipv6_encap_size, encap_header,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ err = PTR_ERR(e->pkt_reformat);
+ goto free_encap;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv6_put(&attr);
+ return err;
+
+free_encap:
+ kfree(encap_header);
+release_neigh:
+ mlx5e_route_lookup_ipv6_put(&attr);
+ return err;
+}
+#endif
+
+int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *flow_attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr;
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ u16 vport_num;
+ int err = 0;
+
+ if (flow_attr->ip_version == 4) {
+ /* Addresses are swapped for decap */
+ attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4;
+ attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4;
+ err = mlx5e_route_lookup_ipv4_get(priv, priv->netdev, &attr);
+ }
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ else if (flow_attr->ip_version == 6) {
+ /* Addresses are swapped for decap */
+ attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6;
+ attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6;
+ err = mlx5e_route_lookup_ipv6_get(priv, priv->netdev, &attr);
+ }
#endif
+ else
+ return 0;
+
+ if (err)
+ return err;
+
+ if (attr.route_dev->netdev_ops != &mlx5e_netdev_ops ||
+ !mlx5e_tc_is_vf_tunnel(attr.out_dev, attr.route_dev))
+ goto out;
+
+ err = mlx5e_tc_query_route_vport(attr.out_dev, attr.route_dev, &vport_num);
+ if (err)
+ goto out;
+
+ esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value,
+ misc_parameters.vxlan_vni);
+ esw_attr->rx_tun_attr->decap_vport = vport_num;
+
+out:
+ if (flow_attr->ip_version == 4)
+ mlx5e_route_lookup_ipv4_put(&attr);
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ else if (flow_attr->ip_version == 6)
+ mlx5e_route_lookup_ipv6_put(&attr);
+#endif
+ return err;
+}
bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
struct net_device *netdev)
@@ -625,14 +886,6 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
}
}
- /* Enforce DMAC when offloading incoming tunneled flows.
- * Flow counters require a match on the DMAC.
- */
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dmac_47_16), priv->netdev->dev_addr);
-
/* let software handle IP fragments */
MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
index 704359df6095..67de2bf36861 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -11,6 +11,8 @@
#include "en.h"
#include "en_rep.h"
+#ifdef CONFIG_MLX5_ESWITCH
+
enum {
MLX5E_TC_TUNNEL_TYPE_UNKNOWN,
MLX5E_TC_TUNNEL_TYPE_VXLAN,
@@ -59,17 +61,30 @@ int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5e_encap_entry *e);
+int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5e_encap_entry *e);
+int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
#else
static inline int
mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5e_encap_entry *e) { return -EOPNOTSUPP; }
+int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{ return -EOPNOTSUPP; }
#endif
+int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
struct net_device *netdev);
@@ -86,4 +101,6 @@ int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
void *headers_c,
void *headers_v);
+#endif /* CONFIG_MLX5_ESWITCH */
+
#endif //__MLX5_EN_TC_TUNNEL_H__
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
new file mode 100644
index 000000000000..6a116335bb21
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -0,0 +1,1653 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <net/fib_notifier.h>
+#include "tc_tun_encap.h"
+#include "en_tc.h"
+#include "tc_tun.h"
+#include "rep/tc.h"
+#include "diag/en_tc_tracepoint.h"
+
+enum {
+ MLX5E_ROUTE_ENTRY_VALID = BIT(0),
+};
+
+struct mlx5e_route_key {
+ int ip_version;
+ union {
+ __be32 v4;
+ struct in6_addr v6;
+ } endpoint_ip;
+};
+
+struct mlx5e_route_entry {
+ struct mlx5e_route_key key;
+ struct list_head encap_entries;
+ struct list_head decap_flows;
+ u32 flags;
+ struct hlist_node hlist;
+ refcount_t refcnt;
+ int tunnel_dev_index;
+ struct rcu_head rcu;
+};
+
+struct mlx5e_tc_tun_encap {
+ struct mlx5e_priv *priv;
+ struct notifier_block fib_nb;
+ spinlock_t route_lock; /* protects route_tbl */
+ unsigned long route_tbl_last_update;
+ DECLARE_HASHTABLE(route_tbl, 8);
+};
+
+static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
+{
+ return r->flags & MLX5E_ROUTE_ENTRY_VALID;
+}
+
+int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec)
+{
+ struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
+ struct mlx5_rx_tun_attr *tun_attr;
+ void *daddr, *saddr;
+ u8 ip_version;
+
+ tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
+ if (!tun_attr)
+ return -ENOMEM;
+
+ esw_attr->rx_tun_attr = tun_attr;
+ ip_version = mlx5e_tc_get_ip_version(spec, true);
+
+ if (ip_version == 4) {
+ daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ tun_attr->dst_ip.v4 = *(__be32 *)daddr;
+ tun_attr->src_ip.v4 = *(__be32 *)saddr;
+ if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
+ return 0;
+ }
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ else if (ip_version == 6) {
+ int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
+ struct in6_addr zerov6 = {};
+
+ daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+ saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
+ memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
+ memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
+ if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) ||
+ !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6)))
+ return 0;
+ }
+#endif
+ /* Only set the flag if both src and dst ip addresses exist. They are
+ * required to establish routing.
+ */
+ flow_flag_set(flow, TUN_RX);
+ return 0;
+}
+
+static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
+{
+ bool all_flow_encaps_valid = true;
+ int i;
+
+ /* Flow can be associated with multiple encap entries.
+ * Before offloading the flow verify that all of them have
+ * a valid neighbour.
+ */
+ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
+ all_flow_encaps_valid = false;
+ break;
+ }
+ }
+
+ return all_flow_encaps_valid;
+}
+
+void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *flow_list)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_spec *spec;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
+ return;
+
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
+ e->encap_size, e->encap_header,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
+ PTR_ERR(e->pkt_reformat));
+ return;
+ }
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(priv);
+
+ list_for_each_entry(flow, flow_list, tmp_list) {
+ if (!mlx5e_is_offloaded_flow(flow))
+ continue;
+ attr = flow->attr;
+ esw_attr = attr->esw_attr;
+ spec = &attr->parse_attr->spec;
+
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
+ esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+
+ /* Do not offload flows with unresolved neighbors */
+ if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
+ continue;
+ /* update from slow path rule to encap rule */
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
+ err);
+ continue;
+ }
+
+ mlx5e_tc_unoffload_from_slow_path(esw, flow);
+ flow->rule[0] = rule;
+ /* was unset when slow path rule removed */
+ flow_flag_set(flow, OFFLOADED);
+ }
+}
+
+void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *flow_list)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_spec *spec;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ list_for_each_entry(flow, flow_list, tmp_list) {
+ if (!mlx5e_is_offloaded_flow(flow))
+ continue;
+ attr = flow->attr;
+ esw_attr = attr->esw_attr;
+ spec = &attr->parse_attr->spec;
+
+ /* update from encap rule to slow path rule */
+ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
+ /* mark the flow's encap dest as non-valid */
+ esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
+ err);
+ continue;
+ }
+
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
+ flow->rule[0] = rule;
+ /* was unset when fast path rule removed */
+ flow_flag_set(flow, OFFLOADED);
+ }
+
+ /* we know that the encap is valid */
+ e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+}
+
+static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
+ struct list_head *flow_list,
+ int index)
+{
+ if (IS_ERR(mlx5e_flow_get(flow)))
+ return;
+ wait_for_completion(&flow->init_done);
+
+ flow->tmp_entry_index = index;
+ list_add(&flow->tmp_list, flow_list);
+}
+
+/* Takes reference to all flows attached to encap and adds the flows to
+ * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
+ */
+void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
+{
+ struct encap_flow_item *efi;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(efi, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
+ mlx5e_take_tmp_flow(flow, flow_list, efi->index);
+ }
+}
+
+/* Takes reference to all flows attached to route and adds the flows to
+ * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
+ */
+static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
+ struct list_head *flow_list)
+{
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, &r->decap_flows, decap_routes)
+ mlx5e_take_tmp_flow(flow, flow_list, 0);
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_encap_entry *next = NULL;
+
+retry:
+ rcu_read_lock();
+
+ /* find encap with non-zero reference counter value */
+ for (next = e ?
+ list_next_or_null_rcu(&nhe->encap_list,
+ &e->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list) :
+ list_first_or_null_rcu(&nhe->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list);
+ next;
+ next = list_next_or_null_rcu(&nhe->encap_list,
+ &next->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list))
+ if (mlx5e_encap_take(next))
+ break;
+
+ rcu_read_unlock();
+
+ /* release starting encap */
+ if (e)
+ mlx5e_encap_put(netdev_priv(e->out_dev), e);
+ if (!next)
+ return next;
+
+ /* wait for encap to be fully initialized */
+ wait_for_completion(&next->res_ready);
+ /* continue searching if encap entry is not in valid state after completion */
+ if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
+ e = next;
+ goto retry;
+ }
+
+ return next;
+}
+
+void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
+ struct mlx5e_encap_entry *e = NULL;
+ struct mlx5e_tc_flow *flow;
+ struct mlx5_fc *counter;
+ struct neigh_table *tbl;
+ bool neigh_used = false;
+ struct neighbour *n;
+ u64 lastuse;
+
+ if (m_neigh->family == AF_INET)
+ tbl = &arp_tbl;
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (m_neigh->family == AF_INET6)
+ tbl = ipv6_stub->nd_tbl;
+#endif
+ else
+ return;
+
+ /* mlx5e_get_next_valid_encap() releases previous encap before returning
+ * next one.
+ */
+ while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
+ struct mlx5e_priv *priv = netdev_priv(e->out_dev);
+ struct encap_flow_item *efi, *tmp;
+ struct mlx5_eswitch *esw;
+ LIST_HEAD(flow_list);
+
+ esw = priv->mdev->priv.eswitch;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_for_each_entry_safe(efi, tmp, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow,
+ encaps[efi->index]);
+ if (IS_ERR(mlx5e_flow_get(flow)))
+ continue;
+ list_add(&flow->tmp_list, &flow_list);
+
+ if (mlx5e_is_offloaded_flow(flow)) {
+ counter = mlx5e_tc_get_counter(flow);
+ lastuse = mlx5_fc_query_lastuse(counter);
+ if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
+ neigh_used = true;
+ break;
+ }
+ }
+ }
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_put_flow_list(priv, &flow_list);
+ if (neigh_used) {
+ /* release current encap before breaking the loop */
+ mlx5e_encap_put(priv, e);
+ break;
+ }
+ }
+
+ trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
+
+ if (neigh_used) {
+ nhe->reported_lastuse = jiffies;
+
+ /* find the relevant neigh according to the cached device and
+ * dst ip pair
+ */
+ n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
+ if (!n)
+ return;
+
+ neigh_event_send(n, NULL);
+ neigh_release(n);
+ }
+}
+
+static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+{
+ WARN_ON(!list_empty(&e->flows));
+
+ if (e->compl_result > 0) {
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID)
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+ }
+
+ kfree(e->tun_info);
+ kfree(e->encap_header);
+ kfree_rcu(e, rcu);
+}
+
+static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
+ struct mlx5e_decap_entry *d)
+{
+ WARN_ON(!list_empty(&d->flows));
+
+ if (!d->compl_result)
+ mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
+
+ kfree_rcu(d, rcu);
+}
+
+void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
+ return;
+ list_del(&e->route_list);
+ hash_del_rcu(&e->encap_hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_encap_dealloc(priv, e);
+}
+
+static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
+ return;
+ hash_del_rcu(&d->hlist);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ mlx5e_decap_dealloc(priv, d);
+}
+
+static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index);
+
+void mlx5e_detach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow, int out_index)
+{
+ struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (flow->attr->esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+ mlx5e_detach_encap_route(priv, flow, out_index);
+
+ /* flow wasn't fully initialized */
+ if (!e)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_del(&flow->encaps[out_index].list);
+ flow->encaps[out_index].e = NULL;
+ if (!refcount_dec_and_test(&e->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ list_del(&e->route_list);
+ hash_del_rcu(&e->encap_hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_encap_dealloc(priv, e);
+}
+
+void mlx5e_detach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_decap_entry *d = flow->decap_reformat;
+
+ if (!d)
+ return;
+
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ list_del(&flow->l3_to_l2_reformat);
+ flow->decap_reformat = NULL;
+
+ if (!refcount_dec_and_test(&d->refcnt)) {
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return;
+ }
+ hash_del_rcu(&d->hlist);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ mlx5e_decap_dealloc(priv, d);
+}
+
+struct encap_key {
+ const struct ip_tunnel_key *ip_tun_key;
+ struct mlx5e_tc_tunnel *tc_tunnel;
+};
+
+static int cmp_encap_info(struct encap_key *a,
+ struct encap_key *b)
+{
+ return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
+ a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
+}
+
+static int cmp_decap_info(struct mlx5e_decap_key *a,
+ struct mlx5e_decap_key *b)
+{
+ return memcmp(&a->key, &b->key, sizeof(b->key));
+}
+
+static int hash_encap_info(struct encap_key *key)
+{
+ return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
+ key->tc_tunnel->tunnel_type);
+}
+
+static int hash_decap_info(struct mlx5e_decap_key *key)
+{
+ return jhash(&key->key, sizeof(key->key), 0);
+}
+
+bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
+{
+ return refcount_inc_not_zero(&e->refcnt);
+}
+
+static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
+{
+ return refcount_inc_not_zero(&e->refcnt);
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
+ uintptr_t hash_key)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_encap_entry *e;
+ struct encap_key e_key;
+
+ hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
+ encap_hlist, hash_key) {
+ e_key.ip_tun_key = &e->tun_info->key;
+ e_key.tc_tunnel = e->tunnel;
+ if (!cmp_encap_info(&e_key, key) &&
+ mlx5e_encap_take(e))
+ return e;
+ }
+
+ return NULL;
+}
+
+static struct mlx5e_decap_entry *
+mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
+ uintptr_t hash_key)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_decap_key r_key;
+ struct mlx5e_decap_entry *e;
+
+ hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
+ hlist, hash_key) {
+ r_key = e->key;
+ if (!cmp_decap_info(&r_key, key) &&
+ mlx5e_decap_take(e))
+ return e;
+ }
+ return NULL;
+}
+
+struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
+{
+ size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
+
+ return kmemdup(tun_info, tun_size, GFP_KERNEL);
+}
+
+static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ int i;
+
+ for (i = 0; i < out_index; i++) {
+ if (flow->encaps[i].e != e)
+ continue;
+ NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
+ netdev_err(priv->netdev, "can't duplicate encap action\n");
+ return true;
+ }
+
+ return false;
+}
+
+static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ struct net_device *out_dev,
+ int route_dev_ifindex,
+ int out_index)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *route_dev;
+ u16 vport_num;
+ int err = 0;
+ u32 data;
+
+ route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
+
+ if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
+ !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
+ goto out;
+
+ err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
+ if (err)
+ goto out;
+
+ attr->dest_chain = 0;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
+ data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
+ vport_num);
+ err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
+ MLX5_FLOW_NAMESPACE_FDB,
+ VPORT_TO_REG, data);
+ if (err >= 0) {
+ esw_attr->dests[out_index].src_port_rewrite_act_id = err;
+ err = 0;
+ }
+
+out:
+ if (route_dev)
+ dev_put(route_dev);
+ return err;
+}
+
+static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
+ struct mlx5_esw_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ struct net_device *out_dev,
+ int route_dev_ifindex,
+ int out_index)
+{
+ int act_id = attr->dests[out_index].src_port_rewrite_act_id;
+ struct net_device *route_dev;
+ u16 vport_num;
+ int err = 0;
+ u32 data;
+
+ route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
+
+ if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
+ !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
+ if (err)
+ goto out;
+
+ data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
+ vport_num);
+ mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
+
+out:
+ if (route_dev)
+ dev_put(route_dev);
+ return err;
+}
+
+static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_tun_encap *encap;
+ unsigned int ret;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ encap = uplink_priv->encap;
+
+ spin_lock_bh(&encap->route_lock);
+ ret = encap->route_tbl_last_update;
+ spin_unlock_bh(&encap->route_lock);
+ return ret;
+}
+
+static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5e_encap_entry *e,
+ bool new_encap_entry,
+ unsigned long tbl_time_before,
+ int out_index);
+
+int mlx5e_attach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct net_device *mirred_dev,
+ int out_index,
+ struct netlink_ext_ack *extack,
+ struct net_device **encap_dev,
+ bool *encap_valid)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ const struct ip_tunnel_info *tun_info;
+ unsigned long tbl_time_before = 0;
+ struct encap_key key;
+ struct mlx5e_encap_entry *e;
+ bool entry_created = false;
+ unsigned short family;
+ uintptr_t hash_key;
+ int err = 0;
+
+ parse_attr = attr->parse_attr;
+ tun_info = parse_attr->tun_info[out_index];
+ family = ip_tunnel_info_af(tun_info);
+ key.ip_tun_key = &tun_info->key;
+ key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
+ if (!key.tc_tunnel) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
+ return -EOPNOTSUPP;
+ }
+
+ hash_key = hash_encap_info(&key);
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ e = mlx5e_encap_get(priv, &key, hash_key);
+
+ /* must verify if encap is valid or not */
+ if (e) {
+ /* Check that entry was not already attached to this flow */
+ if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
+ err = -EOPNOTSUPP;
+ goto out_err;
+ }
+
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ wait_for_completion(&e->res_ready);
+
+ /* Protect against concurrent neigh update. */
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ if (e->compl_result < 0) {
+ err = -EREMOTEIO;
+ goto out_err;
+ }
+ goto attach_flow;
+ }
+
+ e = kzalloc(sizeof(*e), GFP_KERNEL);
+ if (!e) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ refcount_set(&e->refcnt, 1);
+ init_completion(&e->res_ready);
+ entry_created = true;
+ INIT_LIST_HEAD(&e->route_list);
+
+ tun_info = mlx5e_dup_tun_info(tun_info);
+ if (!tun_info) {
+ err = -ENOMEM;
+ goto out_err_init;
+ }
+ e->tun_info = tun_info;
+ err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
+ if (err)
+ goto out_err_init;
+
+ INIT_LIST_HEAD(&e->flows);
+ hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
+ tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ if (family == AF_INET)
+ err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
+ else if (family == AF_INET6)
+ err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
+
+ /* Protect against concurrent neigh update. */
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ complete_all(&e->res_ready);
+ if (err) {
+ e->compl_result = err;
+ goto out_err;
+ }
+ e->compl_result = 1;
+
+attach_flow:
+ err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before,
+ out_index);
+ if (err)
+ goto out_err;
+
+ flow->encaps[out_index].e = e;
+ list_add(&flow->encaps[out_index].list, &e->flows);
+ flow->encaps[out_index].index = out_index;
+ *encap_dev = e->out_dev;
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
+ attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ *encap_valid = true;
+ } else {
+ *encap_valid = false;
+ }
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ return err;
+
+out_err:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ if (e)
+ mlx5e_encap_put(priv, e);
+ return err;
+
+out_err_init:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ kfree(tun_info);
+ kfree(e);
+ return err;
+}
+
+int mlx5e_attach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_decap_entry *d;
+ struct mlx5e_decap_key key;
+ uintptr_t hash_key;
+ int err = 0;
+
+ parse_attr = flow->attr->parse_attr;
+ if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "encap header larger than max supported");
+ return -EOPNOTSUPP;
+ }
+
+ key.key = parse_attr->eth;
+ hash_key = hash_decap_info(&key);
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ d = mlx5e_decap_get(priv, &key, hash_key);
+ if (d) {
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ wait_for_completion(&d->res_ready);
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ if (d->compl_result) {
+ err = -EREMOTEIO;
+ goto out_free;
+ }
+ goto found;
+ }
+
+ d = kzalloc(sizeof(*d), GFP_KERNEL);
+ if (!d) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ d->key = key;
+ refcount_set(&d->refcnt, 1);
+ init_completion(&d->res_ready);
+ INIT_LIST_HEAD(&d->flows);
+ hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
+ sizeof(parse_attr->eth),
+ &parse_attr->eth,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(d->pkt_reformat)) {
+ err = PTR_ERR(d->pkt_reformat);
+ d->compl_result = err;
+ }
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ complete_all(&d->res_ready);
+ if (err)
+ goto out_free;
+
+found:
+ flow->decap_reformat = d;
+ attr->decap_pkt_reformat = d->pkt_reformat;
+ list_add(&flow->l3_to_l2_reformat, &d->flows);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return 0;
+
+out_free:
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ mlx5e_decap_put(priv, d);
+ return err;
+
+out_err:
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return err;
+}
+
+static int cmp_route_info(struct mlx5e_route_key *a,
+ struct mlx5e_route_key *b)
+{
+ if (a->ip_version == 4 && b->ip_version == 4)
+ return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
+ sizeof(a->endpoint_ip.v4));
+ else if (a->ip_version == 6 && b->ip_version == 6)
+ return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
+ sizeof(a->endpoint_ip.v6));
+ return 1;
+}
+
+static u32 hash_route_info(struct mlx5e_route_key *key)
+{
+ if (key->ip_version == 4)
+ return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
+ return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
+}
+
+static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r)
+{
+ WARN_ON(!list_empty(&r->decap_flows));
+ WARN_ON(!list_empty(&r->encap_entries));
+
+ kfree_rcu(r, rcu);
+}
+
+static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
+ return;
+
+ hash_del_rcu(&r->hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_route_dealloc(priv, r);
+}
+
+static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ lockdep_assert_held(&esw->offloads.encap_tbl_lock);
+
+ if (!refcount_dec_and_test(&r->refcnt))
+ return;
+ hash_del_rcu(&r->hlist);
+ mlx5e_route_dealloc(priv, r);
+}
+
+static struct mlx5e_route_entry *
+mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
+ u32 hash_key)
+{
+ struct mlx5e_route_key r_key;
+ struct mlx5e_route_entry *r;
+
+ hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
+ r_key = r->key;
+ if (!cmp_route_info(&r_key, key) &&
+ refcount_inc_not_zero(&r->refcnt))
+ return r;
+ }
+ return NULL;
+}
+
+static struct mlx5e_route_entry *
+mlx5e_route_get_create(struct mlx5e_priv *priv,
+ struct mlx5e_route_key *key,
+ int tunnel_dev_index,
+ unsigned long *route_tbl_change_time)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_tun_encap *encap;
+ struct mlx5e_route_entry *r;
+ u32 hash_key;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ encap = uplink_priv->encap;
+
+ hash_key = hash_route_info(key);
+ spin_lock_bh(&encap->route_lock);
+ r = mlx5e_route_get(encap, key, hash_key);
+ spin_unlock_bh(&encap->route_lock);
+ if (r) {
+ if (!mlx5e_route_entry_valid(r)) {
+ mlx5e_route_put_locked(priv, r);
+ return ERR_PTR(-EINVAL);
+ }
+ return r;
+ }
+
+ r = kzalloc(sizeof(*r), GFP_KERNEL);
+ if (!r)
+ return ERR_PTR(-ENOMEM);
+
+ r->key = *key;
+ r->flags |= MLX5E_ROUTE_ENTRY_VALID;
+ r->tunnel_dev_index = tunnel_dev_index;
+ refcount_set(&r->refcnt, 1);
+ INIT_LIST_HEAD(&r->decap_flows);
+ INIT_LIST_HEAD(&r->encap_entries);
+
+ spin_lock_bh(&encap->route_lock);
+ *route_tbl_change_time = encap->route_tbl_last_update;
+ hash_add(encap->route_tbl, &r->hlist, hash_key);
+ spin_unlock_bh(&encap->route_lock);
+
+ return r;
+}
+
+static struct mlx5e_route_entry *
+mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
+{
+ u32 hash_key = hash_route_info(key);
+ struct mlx5e_route_entry *r;
+
+ spin_lock_bh(&encap->route_lock);
+ encap->route_tbl_last_update = jiffies;
+ r = mlx5e_route_get(encap, key, hash_key);
+ spin_unlock_bh(&encap->route_lock);
+
+ return r;
+}
+
+struct mlx5e_tc_fib_event_data {
+ struct work_struct work;
+ unsigned long event;
+ struct mlx5e_route_entry *r;
+ struct net_device *ul_dev;
+};
+
+static void mlx5e_tc_fib_event_work(struct work_struct *work);
+static struct mlx5e_tc_fib_event_data *
+mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
+{
+ struct mlx5e_tc_fib_event_data *fib_work;
+
+ fib_work = kzalloc(sizeof(*fib_work), flags);
+ if (WARN_ON(!fib_work))
+ return NULL;
+
+ INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
+ fib_work->event = event;
+ fib_work->ul_dev = ul_dev;
+
+ return fib_work;
+}
+
+static int
+mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r,
+ unsigned long event)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct net_device *ul_dev;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ ul_dev = uplink_rpriv->netdev;
+
+ fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
+ if (!fib_work)
+ return -ENOMEM;
+
+ dev_hold(ul_dev);
+ refcount_inc(&r->refcnt);
+ fib_work->r = r;
+ queue_work(priv->wq, &fib_work->work);
+
+ return 0;
+}
+
+int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ unsigned long tbl_time_before, tbl_time_after;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ int err = 0;
+
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ if (!esw_attr->rx_tun_attr)
+ goto out;
+
+ tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
+ tbl_time_after = tbl_time_before;
+ err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
+ if (err || !esw_attr->rx_tun_attr->decap_vport)
+ goto out;
+
+ key.ip_version = attr->ip_version;
+ if (key.ip_version == 4)
+ key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
+ else
+ key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
+
+ r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
+ &tbl_time_after);
+ if (IS_ERR(r)) {
+ err = PTR_ERR(r);
+ goto out;
+ }
+ /* Routing changed concurrently. FIB event handler might have missed new
+ * entry, schedule update.
+ */
+ if (tbl_time_before != tbl_time_after) {
+ err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
+ if (err) {
+ mlx5e_route_put_locked(priv, r);
+ goto out;
+ }
+ }
+
+ flow->decap_route = r;
+ list_add(&flow->decap_routes, &r->decap_flows);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return 0;
+
+out:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return err;
+}
+
+static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5e_encap_entry *e,
+ bool new_encap_entry,
+ unsigned long tbl_time_before,
+ int out_index)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ unsigned long tbl_time_after = tbl_time_before;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ const struct ip_tunnel_info *tun_info;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ unsigned short family;
+ int err = 0;
+
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ tun_info = parse_attr->tun_info[out_index];
+ family = ip_tunnel_info_af(tun_info);
+
+ if (family == AF_INET) {
+ key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
+ key.ip_version = 4;
+ } else if (family == AF_INET6) {
+ key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
+ key.ip_version = 6;
+ }
+
+ err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
+ e->route_dev_ifindex, out_index);
+ if (err || !(esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
+ return err;
+
+ r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
+ &tbl_time_after);
+ if (IS_ERR(r))
+ return PTR_ERR(r);
+ /* Routing changed concurrently. FIB event handler might have missed new
+ * entry, schedule update.
+ */
+ if (tbl_time_before != tbl_time_after) {
+ err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
+ if (err) {
+ mlx5e_route_put_locked(priv, r);
+ return err;
+ }
+ }
+
+ flow->encap_routes[out_index].r = r;
+ if (new_encap_entry)
+ list_add(&e->route_list, &r->encap_entries);
+ flow->encap_routes[out_index].index = out_index;
+ return 0;
+}
+
+void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_route_entry *r = flow->decap_route;
+
+ if (!r)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_del(&flow->decap_routes);
+ flow->decap_route = NULL;
+
+ if (!refcount_dec_and_test(&r->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ hash_del_rcu(&r->hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_route_dealloc(priv, r);
+}
+
+static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index)
+{
+ struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_encap_entry *e, *tmp;
+
+ if (!r)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ flow->encap_routes[out_index].r = NULL;
+
+ if (!refcount_dec_and_test(&r->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
+ list_del_init(&e->route_list);
+ hash_del_rcu(&r->hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_route_dealloc(priv, r);
+}
+
+static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *encap_flows)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, encap_flows, tmp_list) {
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+
+ if (!mlx5e_is_offloaded_flow(flow))
+ continue;
+ esw_attr = attr->esw_attr;
+
+ if (flow_flag_test(flow, SLOW))
+ mlx5e_tc_unoffload_from_slow_path(esw, flow);
+ else
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+ mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
+ attr->modify_hdr = NULL;
+
+ esw_attr->dests[flow->tmp_entry_index].flags &=
+ ~MLX5_ESW_DEST_ENCAP_VALID;
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+ e->pkt_reformat = NULL;
+ }
+}
+
+static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
+ struct net_device *tunnel_dev,
+ struct mlx5e_encap_entry *e,
+ struct list_head *encap_flows)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
+ mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
+ mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
+ if (err)
+ mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
+ e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
+
+ list_for_each_entry(flow, encap_flows, tmp_list) {
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+
+ if (flow_flag_test(flow, FAILED))
+ continue;
+
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ spec = &parse_attr->spec;
+
+ err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
+ e->out_dev, e->route_dev_ifindex,
+ flow->tmp_entry_index);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
+ continue;
+ }
+
+ err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
+ err);
+ continue;
+ }
+
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
+ esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
+ goto offload_to_slow_path;
+ /* update from slow path rule to encap rule */
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
+ err);
+ } else {
+ flow->rule[0] = rule;
+ }
+ } else {
+offload_to_slow_path:
+ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
+ /* mark the flow's encap dest as non-valid */
+ esw_attr->dests[flow->tmp_entry_index].flags &=
+ ~MLX5_ESW_DEST_ENCAP_VALID;
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
+ err);
+ } else {
+ flow->rule[0] = rule;
+ }
+ }
+ flow_flag_set(flow, OFFLOADED);
+ }
+}
+
+static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r,
+ struct list_head *flow_list,
+ bool replace)
+{
+ struct net_device *tunnel_dev;
+ struct mlx5e_encap_entry *e;
+
+ tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
+ if (!tunnel_dev)
+ return -ENODEV;
+
+ list_for_each_entry(e, &r->encap_entries, route_list) {
+ LIST_HEAD(encap_flows);
+
+ mlx5e_take_all_encap_flows(e, &encap_flows);
+ if (list_empty(&encap_flows))
+ continue;
+
+ if (mlx5e_route_entry_valid(r))
+ mlx5e_invalidate_encap(priv, e, &encap_flows);
+
+ if (!replace) {
+ list_splice(&encap_flows, flow_list);
+ continue;
+ }
+
+ mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
+ list_splice(&encap_flows, flow_list);
+ }
+
+ return 0;
+}
+
+static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
+ struct list_head *flow_list)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, flow_list, tmp_list)
+ if (mlx5e_is_offloaded_flow(flow))
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+}
+
+static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
+ struct list_head *decap_flows)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, decap_flows, tmp_list) {
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ if (flow_flag_test(flow, FAILED))
+ continue;
+
+ parse_attr = attr->parse_attr;
+ spec = &parse_attr->spec;
+ err = mlx5e_tc_tun_route_lookup(priv, spec, attr);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
+ err);
+ continue;
+ }
+
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
+ err);
+ } else {
+ flow->rule[0] = rule;
+ flow_flag_set(flow, OFFLOADED);
+ }
+ }
+}
+
+static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r,
+ struct list_head *flow_list,
+ bool replace)
+{
+ struct net_device *tunnel_dev;
+ LIST_HEAD(decap_flows);
+
+ tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
+ if (!tunnel_dev)
+ return -ENODEV;
+
+ mlx5e_take_all_route_decap_flows(r, &decap_flows);
+ if (mlx5e_route_entry_valid(r))
+ mlx5e_unoffload_flow_list(priv, &decap_flows);
+ if (replace)
+ mlx5e_reoffload_decap(priv, &decap_flows);
+
+ list_splice(&decap_flows, flow_list);
+
+ return 0;
+}
+
+static void mlx5e_tc_fib_event_work(struct work_struct *work)
+{
+ struct mlx5e_tc_fib_event_data *event_data =
+ container_of(work, struct mlx5e_tc_fib_event_data, work);
+ struct net_device *ul_dev = event_data->ul_dev;
+ struct mlx5e_priv *priv = netdev_priv(ul_dev);
+ struct mlx5e_route_entry *r = event_data->r;
+ struct mlx5_eswitch *esw;
+ LIST_HEAD(flow_list);
+ bool replace;
+ int err;
+
+ /* sync with concurrent neigh updates */
+ rtnl_lock();
+ esw = priv->mdev->priv.eswitch;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
+
+ if (!mlx5e_route_entry_valid(r) && !replace)
+ goto out;
+
+ err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
+ if (err)
+ mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
+ err);
+
+ err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
+ if (err)
+ mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
+ err);
+
+ if (replace)
+ r->flags |= MLX5E_ROUTE_ENTRY_VALID;
+out:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ rtnl_unlock();
+
+ mlx5e_put_flow_list(priv, &flow_list);
+ mlx5e_route_put(priv, event_data->r);
+ dev_put(event_data->ul_dev);
+ kfree(event_data);
+}
+
+static struct mlx5e_tc_fib_event_data *
+mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
+ struct net_device *ul_dev,
+ struct mlx5e_tc_tun_encap *encap,
+ unsigned long event,
+ struct fib_notifier_info *info)
+{
+ struct fib_entry_notifier_info *fen_info;
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ struct net_device *fib_dev;
+
+ fen_info = container_of(info, struct fib_entry_notifier_info, info);
+ fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
+ if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
+ fen_info->dst_len != 32)
+ return NULL;
+
+ fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
+ if (!fib_work)
+ return ERR_PTR(-ENOMEM);
+
+ key.endpoint_ip.v4 = htonl(fen_info->dst);
+ key.ip_version = 4;
+
+ /* Can't fail after this point because releasing reference to r
+ * requires obtaining sleeping mutex which we can't do in atomic
+ * context.
+ */
+ r = mlx5e_route_lookup_for_update(encap, &key);
+ if (!r)
+ goto out;
+ fib_work->r = r;
+ dev_hold(ul_dev);
+
+ return fib_work;
+
+out:
+ kfree(fib_work);
+ return NULL;
+}
+
+static struct mlx5e_tc_fib_event_data *
+mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
+ struct net_device *ul_dev,
+ struct mlx5e_tc_tun_encap *encap,
+ unsigned long event,
+ struct fib_notifier_info *info)
+{
+ struct fib6_entry_notifier_info *fen_info;
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ struct net_device *fib_dev;
+
+ fen_info = container_of(info, struct fib6_entry_notifier_info, info);
+ fib_dev = fib6_info_nh_dev(fen_info->rt);
+ if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
+ fen_info->rt->fib6_dst.plen != 128)
+ return NULL;
+
+ fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
+ if (!fib_work)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
+ sizeof(fen_info->rt->fib6_dst.addr));
+ key.ip_version = 6;
+
+ /* Can't fail after this point because releasing reference to r
+ * requires obtaining sleeping mutex which we can't do in atomic
+ * context.
+ */
+ r = mlx5e_route_lookup_for_update(encap, &key);
+ if (!r)
+ goto out;
+ fib_work->r = r;
+ dev_hold(ul_dev);
+
+ return fib_work;
+
+out:
+ kfree(fib_work);
+ return NULL;
+}
+
+static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
+{
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct fib_notifier_info *info = ptr;
+ struct mlx5e_tc_tun_encap *encap;
+ struct net_device *ul_dev;
+ struct mlx5e_priv *priv;
+
+ encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
+ priv = encap->priv;
+ ul_dev = priv->netdev;
+ priv = netdev_priv(ul_dev);
+
+ switch (event) {
+ case FIB_EVENT_ENTRY_REPLACE:
+ case FIB_EVENT_ENTRY_DEL:
+ if (info->family == AF_INET)
+ fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
+ else if (info->family == AF_INET6)
+ fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
+ else
+ return NOTIFY_DONE;
+
+ if (!IS_ERR_OR_NULL(fib_work)) {
+ queue_work(priv->wq, &fib_work->work);
+ } else if (IS_ERR(fib_work)) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
+ mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
+ PTR_ERR(fib_work));
+ }
+
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_DONE;
+}
+
+struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_tun_encap *encap;
+ int err;
+
+ encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
+ if (!encap)
+ return ERR_PTR(-ENOMEM);
+
+ encap->priv = priv;
+ encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
+ spin_lock_init(&encap->route_lock);
+ hash_init(encap->route_tbl);
+ err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
+ NULL, NULL);
+ if (err) {
+ kvfree(encap);
+ return ERR_PTR(err);
+ }
+
+ return encap;
+}
+
+void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
+{
+ if (!encap)
+ return;
+
+ unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
+ flush_workqueue(encap->priv->wq); /* flush fib event works */
+ kvfree(encap);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
new file mode 100644
index 000000000000..3391504d9a08
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_TUN_ENCAP_H__
+#define __MLX5_EN_TC_TUN_ENCAP_H__
+
+#include "tc_priv.h"
+
+void mlx5e_detach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow, int out_index);
+
+int mlx5e_attach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct net_device *mirred_dev,
+ int out_index,
+ struct netlink_ext_ack *extack,
+ struct net_device **encap_dev,
+ bool *encap_valid);
+int mlx5e_attach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack);
+void mlx5e_detach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+
+int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+
+struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info);
+
+int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec);
+
+struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv);
+void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap);
+
+#endif /* __MLX5_EN_TC_TUN_ENCAP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
index 1f9526244222..3479672e84cf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
@@ -81,8 +81,8 @@ static int parse_tunnel(struct mlx5e_priv *priv,
if (!enc_keyid.mask->keyid)
return 0;
- if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
- MLX5_FLEX_PROTO_CW_MPLS_UDP))
+ if (!MLX5_CAP_ETH(priv->mdev, tunnel_stateless_mpls_over_udp) &&
+ !(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_CW_MPLS_UDP))
return -EOPNOTSUPP;
flow_rule_match_mpls(rule, &match);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
new file mode 100644
index 000000000000..37fc1d77ded7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies */
+
+#include <net/page_pool.h>
+#include "en/txrx.h"
+#include "en/params.h"
+#include "en/trap.h"
+
+static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct mlx5e_trap *trap_ctx = container_of(napi, struct mlx5e_trap, napi);
+ struct mlx5e_ch_stats *ch_stats = trap_ctx->stats;
+ struct mlx5e_rq *rq = &trap_ctx->rq;
+ bool busy = false;
+ int work_done = 0;
+
+ ch_stats->poll++;
+
+ work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
+ busy |= work_done == budget;
+ busy |= rq->post_wqes(rq);
+
+ if (busy)
+ return budget;
+
+ if (unlikely(!napi_complete_done(napi, work_done)))
+ return work_done;
+
+ mlx5e_cq_arm(&rq->cq);
+ return work_done;
+}
+
+static int mlx5e_alloc_trap_rq(struct mlx5e_priv *priv, struct mlx5e_rq_param *rqp,
+ struct mlx5e_rq_stats *stats, struct mlx5e_params *params,
+ struct mlx5e_ch_stats *ch_stats,
+ struct mlx5e_rq *rq)
+{
+ void *rqc_wq = MLX5_ADDR_OF(rqc, rqp->rqc, wq);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct page_pool_params pp_params = {};
+ int node = dev_to_node(mdev->device);
+ u32 pool_size;
+ int wq_sz;
+ int err;
+ int i;
+
+ rqp->wq.db_numa_node = node;
+
+ rq->wq_type = params->rq_wq_type;
+ rq->pdev = mdev->device;
+ rq->netdev = priv->netdev;
+ rq->mdev = mdev;
+ rq->priv = priv;
+ rq->stats = stats;
+ rq->clock = &mdev->clock;
+ rq->tstamp = &priv->tstamp;
+ rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+
+ xdp_rxq_info_unused(&rq->xdp_rxq);
+
+ rq->buff.map_dir = DMA_FROM_DEVICE;
+ rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, NULL);
+ pool_size = 1 << params->log_rq_mtu_frames;
+
+ err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, &rq->wq_ctrl);
+ if (err)
+ return err;
+
+ rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
+
+ wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
+
+ rq->wqe.info = rqp->frags_info;
+ rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride;
+ rq->wqe.frags = kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
+ (wq_sz << rq->wqe.info.log_num_frags)),
+ GFP_KERNEL, node);
+ if (!rq->wqe.frags) {
+ err = -ENOMEM;
+ goto err_wq_cyc_destroy;
+ }
+
+ err = mlx5e_init_di_list(rq, wq_sz, node);
+ if (err)
+ goto err_free_frags;
+
+ rq->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
+
+ mlx5e_rq_set_trap_handlers(rq, params);
+
+ /* Create a page_pool and register it with rxq */
+ pp_params.order = 0;
+ pp_params.flags = 0; /* No-internal DMA mapping in page_pool */
+ pp_params.pool_size = pool_size;
+ pp_params.nid = node;
+ pp_params.dev = mdev->device;
+ pp_params.dma_dir = rq->buff.map_dir;
+
+ /* page_pool can be used even when there is no rq->xdp_prog,
+ * given page_pool does not handle DMA mapping there is no
+ * required state to clear. And page_pool gracefully handle
+ * elevated refcnt.
+ */
+ rq->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(rq->page_pool)) {
+ err = PTR_ERR(rq->page_pool);
+ rq->page_pool = NULL;
+ goto err_free_di_list;
+ }
+ for (i = 0; i < wq_sz; i++) {
+ struct mlx5e_rx_wqe_cyc *wqe =
+ mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i);
+ int f;
+
+ for (f = 0; f < rq->wqe.info.num_frags; f++) {
+ u32 frag_size = rq->wqe.info.arr[f].frag_size |
+ MLX5_HW_START_PADDING;
+
+ wqe->data[f].byte_count = cpu_to_be32(frag_size);
+ wqe->data[f].lkey = rq->mkey_be;
+ }
+ /* check if num_frags is not a pow of two */
+ if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) {
+ wqe->data[f].byte_count = 0;
+ wqe->data[f].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
+ wqe->data[f].addr = 0;
+ }
+ }
+ return 0;
+
+err_free_di_list:
+ mlx5e_free_di_list(rq);
+err_free_frags:
+ kvfree(rq->wqe.frags);
+err_wq_cyc_destroy:
+ mlx5_wq_destroy(&rq->wq_ctrl);
+
+ return err;
+}
+
+static void mlx5e_free_trap_rq(struct mlx5e_rq *rq)
+{
+ page_pool_destroy(rq->page_pool);
+ mlx5e_free_di_list(rq);
+ kvfree(rq->wqe.frags);
+ mlx5_wq_destroy(&rq->wq_ctrl);
+}
+
+static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct napi_struct *napi,
+ struct mlx5e_rq_stats *stats, struct mlx5e_params *params,
+ struct mlx5e_rq_param *rq_param,
+ struct mlx5e_ch_stats *ch_stats,
+ struct mlx5e_rq *rq)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_create_cq_param ccp = {};
+ struct dim_cq_moder trap_moder = {};
+ struct mlx5e_cq *cq = &rq->cq;
+ int err;
+
+ ccp.node = dev_to_node(mdev->device);
+ ccp.ch_stats = ch_stats;
+ ccp.napi = napi;
+ ccp.ix = 0;
+ err = mlx5e_open_cq(priv, trap_moder, &rq_param->cqp, &ccp, cq);
+ if (err)
+ return err;
+
+ err = mlx5e_alloc_trap_rq(priv, rq_param, stats, params, ch_stats, rq);
+ if (err)
+ goto err_destroy_cq;
+
+ err = mlx5e_create_rq(rq, rq_param);
+ if (err)
+ goto err_free_rq;
+
+ err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+ if (err)
+ goto err_destroy_rq;
+
+ return 0;
+
+err_destroy_rq:
+ mlx5e_destroy_rq(rq);
+ mlx5e_free_rx_descs(rq);
+err_free_rq:
+ mlx5e_free_trap_rq(rq);
+err_destroy_cq:
+ mlx5e_close_cq(cq);
+
+ return err;
+}
+
+static void mlx5e_close_trap_rq(struct mlx5e_rq *rq)
+{
+ mlx5e_destroy_rq(rq);
+ mlx5e_free_rx_descs(rq);
+ mlx5e_free_trap_rq(rq);
+ mlx5e_close_cq(&rq->cq);
+}
+
+static int mlx5e_create_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir,
+ u32 rqn)
+{
+ void *tirc;
+ int inlen;
+ u32 *in;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+ MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.td.tdn);
+ MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE);
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
+ MLX5_SET(tirc, tirc, inline_rqn, rqn);
+ err = mlx5e_create_tir(mdev, tir, in);
+ kvfree(in);
+
+ return err;
+}
+
+static void mlx5e_destroy_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir)
+{
+ mlx5e_destroy_tir(mdev, tir);
+}
+
+static void mlx5e_activate_trap_rq(struct mlx5e_rq *rq)
+{
+ set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
+}
+
+static void mlx5e_deactivate_trap_rq(struct mlx5e_rq *rq)
+{
+ clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
+}
+
+static void mlx5e_build_trap_params(struct mlx5e_priv *priv, struct mlx5e_trap *t)
+{
+ struct mlx5e_params *params = &t->params;
+
+ params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC;
+ mlx5e_init_rq_type_params(priv->mdev, params);
+ params->sw_mtu = priv->netdev->max_mtu;
+ mlx5e_build_rq_param(priv, params, NULL, &t->rq_param);
+}
+
+static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv)
+{
+ int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, 0));
+ struct net_device *netdev = priv->netdev;
+ struct mlx5e_trap *t;
+ int err;
+
+ t = kvzalloc_node(sizeof(*t), GFP_KERNEL, cpu_to_node(cpu));
+ if (!t)
+ return ERR_PTR(-ENOMEM);
+
+ mlx5e_build_trap_params(priv, t);
+
+ t->priv = priv;
+ t->mdev = priv->mdev;
+ t->tstamp = &priv->tstamp;
+ t->pdev = mlx5_core_dma_dev(priv->mdev);
+ t->netdev = priv->netdev;
+ t->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
+ t->stats = &priv->trap_stats.ch;
+
+ netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll, 64);
+
+ err = mlx5e_open_trap_rq(priv, &t->napi,
+ &priv->trap_stats.rq,
+ &t->params, &t->rq_param,
+ &priv->trap_stats.ch,
+ &t->rq);
+ if (unlikely(err))
+ goto err_napi_del;
+
+ err = mlx5e_create_trap_direct_rq_tir(t->mdev, &t->tir, t->rq.rqn);
+ if (err)
+ goto err_close_trap_rq;
+
+ return t;
+
+err_close_trap_rq:
+ mlx5e_close_trap_rq(&t->rq);
+err_napi_del:
+ netif_napi_del(&t->napi);
+ kvfree(t);
+ return ERR_PTR(err);
+}
+
+void mlx5e_close_trap(struct mlx5e_trap *trap)
+{
+ mlx5e_destroy_trap_direct_rq_tir(trap->mdev, &trap->tir);
+ mlx5e_close_trap_rq(&trap->rq);
+ netif_napi_del(&trap->napi);
+ kvfree(trap);
+}
+
+static void mlx5e_activate_trap(struct mlx5e_trap *trap)
+{
+ napi_enable(&trap->napi);
+ mlx5e_activate_trap_rq(&trap->rq);
+ napi_schedule(&trap->napi);
+}
+
+void mlx5e_deactivate_trap(struct mlx5e_priv *priv)
+{
+ struct mlx5e_trap *trap = priv->en_trap;
+
+ mlx5e_deactivate_trap_rq(&trap->rq);
+ napi_disable(&trap->napi);
+}
+
+static struct mlx5e_trap *mlx5e_add_trap_queue(struct mlx5e_priv *priv)
+{
+ struct mlx5e_trap *trap;
+
+ trap = mlx5e_open_trap(priv);
+ if (IS_ERR(trap))
+ goto out;
+
+ mlx5e_activate_trap(trap);
+out:
+ return trap;
+}
+
+static void mlx5e_del_trap_queue(struct mlx5e_priv *priv)
+{
+ mlx5e_deactivate_trap(priv);
+ mlx5e_close_trap(priv->en_trap);
+ priv->en_trap = NULL;
+}
+
+static int mlx5e_trap_get_tirn(struct mlx5e_trap *en_trap)
+{
+ return en_trap->tir.tirn;
+}
+
+static int mlx5e_handle_action_trap(struct mlx5e_priv *priv, int trap_id)
+{
+ bool open_queue = !priv->en_trap;
+ struct mlx5e_trap *trap;
+ int err;
+
+ if (open_queue) {
+ trap = mlx5e_add_trap_queue(priv);
+ if (IS_ERR(trap))
+ return PTR_ERR(trap);
+ priv->en_trap = trap;
+ }
+
+ switch (trap_id) {
+ case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER:
+ err = mlx5e_add_vlan_trap(priv, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
+ if (err)
+ goto err_out;
+ break;
+ case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER:
+ err = mlx5e_add_mac_trap(priv, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
+ if (err)
+ goto err_out;
+ break;
+ default:
+ netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id);
+ err = -EINVAL;
+ goto err_out;
+ }
+ return 0;
+
+err_out:
+ if (open_queue)
+ mlx5e_del_trap_queue(priv);
+ return err;
+}
+
+static int mlx5e_handle_action_drop(struct mlx5e_priv *priv, int trap_id)
+{
+ switch (trap_id) {
+ case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER:
+ mlx5e_remove_vlan_trap(priv);
+ break;
+ case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER:
+ mlx5e_remove_mac_trap(priv);
+ break;
+ default:
+ netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id);
+ return -EINVAL;
+ }
+ if (priv->en_trap && !mlx5_devlink_trap_get_num_active(priv->mdev))
+ mlx5e_del_trap_queue(priv);
+
+ return 0;
+}
+
+int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ctx)
+{
+ int err = 0;
+
+ /* Traps are unarmed when interface is down, no need to update
+ * them. The configuration is saved in the core driver,
+ * queried and applied upon interface up operation in
+ * mlx5e_open_locked().
+ */
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ switch (trap_ctx->action) {
+ case DEVLINK_TRAP_ACTION_TRAP:
+ err = mlx5e_handle_action_trap(priv, trap_ctx->id);
+ break;
+ case DEVLINK_TRAP_ACTION_DROP:
+ err = mlx5e_handle_action_drop(priv, trap_ctx->id);
+ break;
+ default:
+ netdev_warn(priv->netdev, "%s: Unsupported action %d\n", __func__,
+ trap_ctx->action);
+ err = -EINVAL;
+ }
+ return err;
+}
+
+static int mlx5e_apply_trap(struct mlx5e_priv *priv, int trap_id, bool enable)
+{
+ enum devlink_trap_action action;
+ int err;
+
+ err = mlx5_devlink_traps_get_action(priv->mdev, trap_id, &action);
+ if (err)
+ return err;
+ if (action == DEVLINK_TRAP_ACTION_TRAP)
+ err = enable ? mlx5e_handle_action_trap(priv, trap_id) :
+ mlx5e_handle_action_drop(priv, trap_id);
+ return err;
+}
+
+static const int mlx5e_traps_arr[] = {
+ DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER,
+ DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER,
+};
+
+int mlx5e_apply_traps(struct mlx5e_priv *priv, bool enable)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mlx5e_traps_arr); i++) {
+ err = mlx5e_apply_trap(priv, mlx5e_traps_arr[i], enable);
+ if (err)
+ return err;
+ }
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h
new file mode 100644
index 000000000000..aa3f17658c6d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies */
+
+#ifndef __MLX5E_TRAP_H__
+#define __MLX5E_TRAP_H__
+
+#include "../en.h"
+#include "../devlink.h"
+
+struct mlx5e_trap {
+ /* data path */
+ struct mlx5e_rq rq;
+ struct mlx5e_tir tir;
+ struct napi_struct napi;
+ struct device *pdev;
+ struct net_device *netdev;
+ __be32 mkey_be;
+
+ /* data path - accessed per napi poll */
+ struct mlx5e_ch_stats *stats;
+
+ /* control */
+ struct mlx5e_priv *priv;
+ struct mlx5_core_dev *mdev;
+ struct hwtstamp_config *tstamp;
+ DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
+
+ struct mlx5e_params params;
+ struct mlx5e_rq_param rq_param;
+};
+
+void mlx5e_close_trap(struct mlx5e_trap *trap);
+void mlx5e_deactivate_trap(struct mlx5e_priv *priv);
+int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ctx);
+int mlx5e_apply_traps(struct mlx5e_priv *priv, bool enable);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 4880f2179273..2371b83dad9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -26,6 +26,13 @@
#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
+static inline
+ktime_t mlx5e_cqe_ts_to_ns(cqe_ts_to_ns func, struct mlx5_clock *clock, u64 cqe_ts)
+{
+ return INDIRECT_CALL_2(func, mlx5_real_time_cyc2time, mlx5_timecounter_cyc2time,
+ clock, cqe_ts);
+}
+
enum mlx5e_icosq_wqe_type {
MLX5E_ICOSQ_WQE_NOP,
MLX5E_ICOSQ_WQE_UMR_RX,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index d487e5e37162..8d991c3b7a50 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -83,7 +83,7 @@ static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv)
clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
/* Let other device's napi(s) and XSK wakeups see our new state. */
- synchronize_rcu();
+ synchronize_net();
}
static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index d87c345878d3..f4bce1365639 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -111,7 +111,7 @@ err_free_cparam:
void mlx5e_close_xsk(struct mlx5e_channel *c)
{
clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
- synchronize_rcu(); /* Sync with the XSK wakeup and with NAPI. */
+ synchronize_net(); /* Sync with the XSK wakeup and with NAPI. */
mlx5e_close_rq(&c->xskrq);
mlx5e_close_cq(&c->xskrq.cq);