summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h175
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c499
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c1653
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c1083
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h40
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c517
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h76
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c631
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c12
-rw-r--r--include/linux/mlx5/eswitch.h29
25 files changed, 3812 insertions, 1057 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 8809dd4de57e..8cb2625472c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -40,6 +40,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag_mp.o lib/geneve.o lib/port_tun.o \
en_rep.o en/rep/bond.o en/mod_hdr.o
mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \
en/mapping.o lib/fs_chains.o en/tc_tun.o \
+ esw/indir_table.o en/tc_tun_encap.o \
en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o
mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h
index 1177860a2ee4..f15718db5d0e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h
@@ -15,7 +15,7 @@ TRACE_EVENT(mlx5e_rep_neigh_update,
TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, const u8 *ha,
bool neigh_connected),
TP_ARGS(nhe, ha, neigh_connected),
- TP_STRUCT__entry(__string(devname, nhe->m_neigh.dev->name)
+ TP_STRUCT__entry(__string(devname, nhe->neigh_dev->name)
__array(u8, ha, ETH_ALEN)
__array(u8, v4, 4)
__array(u8, v6, 16)
@@ -25,7 +25,7 @@ TRACE_EVENT(mlx5e_rep_neigh_update,
struct in6_addr *pin6;
__be32 *p32;
- __assign_str(devname, mn->dev->name);
+ __assign_str(devname, nhe->neigh_dev->name);
__entry->neigh_connected = neigh_connected;
memcpy(__entry->ha, ha, ETH_ALEN);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h
index d4e6cfaaade3..ac52ef37f38a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h
@@ -77,7 +77,7 @@ TRACE_EVENT(mlx5e_stats_flower,
TRACE_EVENT(mlx5e_tc_update_neigh_used_value,
TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, bool neigh_used),
TP_ARGS(nhe, neigh_used),
- TP_STRUCT__entry(__string(devname, nhe->m_neigh.dev->name)
+ TP_STRUCT__entry(__string(devname, nhe->neigh_dev->name)
__array(u8, v4, 4)
__array(u8, v6, 16)
__field(bool, neigh_used)
@@ -86,7 +86,7 @@ TRACE_EVENT(mlx5e_tc_update_neigh_used_value,
struct in6_addr *pin6;
__be32 *p32;
- __assign_str(devname, mn->dev->name);
+ __assign_str(devname, nhe->neigh_dev->name);
__entry->neigh_used = neigh_used;
p32 = (__be32 *)__entry->v4;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
index 616ee585a985..be0ee03de721 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
@@ -129,10 +129,10 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
work);
struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
struct neighbour *n = update_work->n;
+ bool neigh_connected, same_dev;
struct mlx5e_encap_entry *e;
unsigned char ha[ETH_ALEN];
struct mlx5e_priv *priv;
- bool neigh_connected;
u8 nud_state, dead;
rtnl_lock();
@@ -146,12 +146,16 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
memcpy(ha, n->ha, ETH_ALEN);
nud_state = n->nud_state;
dead = n->dead;
+ same_dev = READ_ONCE(nhe->neigh_dev) == n->dev;
read_unlock_bh(&n->lock);
neigh_connected = (nud_state & NUD_VALID) && !dead;
trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
+ if (!same_dev)
+ goto out;
+
list_for_each_entry(e, &nhe->encap_list, encap_list) {
if (!mlx5e_encap_take(e))
continue;
@@ -160,6 +164,7 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
mlx5e_encap_put(priv, e);
}
+out:
rtnl_unlock();
mlx5e_release_neigh_update_work(update_work);
}
@@ -175,7 +180,6 @@ static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv
if (WARN_ON(!update_work))
return NULL;
- m_neigh.dev = n->dev;
m_neigh.family = n->ops->family;
memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
@@ -246,7 +250,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb,
rcu_read_lock();
list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
neigh_list) {
- if (p->dev == nhe->m_neigh.dev) {
+ if (p->dev == READ_ONCE(nhe->neigh_dev)) {
found = true;
break;
}
@@ -369,7 +373,8 @@ mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
}
int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev,
struct mlx5e_neigh_hash_entry **nhe)
{
int err;
@@ -379,10 +384,11 @@ int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
return -ENOMEM;
(*nhe)->priv = priv;
- memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
+ memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh));
spin_lock_init(&(*nhe)->encap_list_lock);
INIT_LIST_HEAD(&(*nhe)->encap_list);
refcount_set(&(*nhe)->refcnt, 1);
+ WRITE_ONCE((*nhe)->neigh_dev, neigh_dev);
err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
index 32b239189c95..6fe0ab970943 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
@@ -16,7 +16,8 @@ struct mlx5e_neigh_hash_entry *
mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
struct mlx5e_neigh *m_neigh);
int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev,
struct mlx5e_neigh_hash_entry **nhe);
void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 76177f7c5ec2..065126370acd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -26,7 +26,9 @@ struct mlx5e_rep_indr_block_priv {
};
int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e)
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
@@ -39,9 +41,9 @@ int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
return err;
mutex_lock(&rpriv->neigh_update.encap_lock);
- nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, m_neigh);
if (!nhe) {
- err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
+ err = mlx5e_rep_neigh_entry_create(priv, m_neigh, neigh_dev, &nhe);
if (err) {
mutex_unlock(&rpriv->neigh_update.encap_lock);
mlx5_tun_entropy_refcount_dec(tun_entropy,
@@ -122,7 +124,7 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
}
unlock:
mutex_unlock(&esw->offloads.encap_tbl_lock);
- mlx5e_put_encap_flow_list(priv, &flow_list);
+ mlx5e_put_flow_list(priv, &flow_list);
}
static int
@@ -651,7 +653,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
tc_skb_ext->chain = chain;
- zone_restore_id = reg_c1 & ZONE_RESTORE_MAX;
+ zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK;
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
uplink_priv = &uplink_rpriv->uplink_priv;
@@ -660,7 +662,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
return false;
}
- tunnel_id = reg_c1 >> REG_MAPPING_SHIFT(TUNNEL_TO_REG);
+ tunnel_id = reg_c1 >> ESW_TUN_OFFSET;
return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
#endif /* CONFIG_NET_TC_SKB_EXT */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
index fdf9702c2d7d..d0661578467b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
@@ -27,7 +27,9 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
unsigned char ha[ETH_ALEN]);
int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e);
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh *m_neigh,
+ struct net_device *neigh_dev);
void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 40aaa105b2fc..0b503ebe59ec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -711,6 +711,8 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
attr->outer_match_level = MLX5_MATCH_L4;
attr->counter = entry->counter->counter;
attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
+ if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ attr->esw_attr->in_mdev = priv->mdev;
mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
@@ -1761,7 +1763,6 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
goto err_set_registers;
}
- dealloc_mod_hdr_actions(mod_acts);
pre_ct_attr->modify_hdr = mod_hdr;
pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
index 6503b614337c..69e618d17071 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -73,7 +73,7 @@ struct mlx5_ct_attr {
#define zone_restore_to_reg_ct {\
.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,\
.moffset = 0,\
- .mlen = 1,\
+ .mlen = (ESW_ZONE_ID_BITS / 8),\
.soffset = MLX5_BYTE_OFF(fte_match_param,\
misc_parameters_2.metadata_reg_c_1) + 3,\
}
@@ -81,14 +81,12 @@ struct mlx5_ct_attr {
#define nic_zone_restore_to_reg_ct {\
.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,\
.moffset = 2,\
- .mlen = 1,\
+ .mlen = (ESW_ZONE_ID_BITS / 8),\
}
#define REG_MAPPING_MLEN(reg) (mlx5e_tc_attr_to_reg_mappings[reg].mlen)
#define REG_MAPPING_MOFFSET(reg) (mlx5e_tc_attr_to_reg_mappings[reg].moffset)
#define REG_MAPPING_SHIFT(reg) (REG_MAPPING_MOFFSET(reg) * 8)
-#define ZONE_RESTORE_BITS (REG_MAPPING_MLEN(ZONE_RESTORE_TO_REG) * 8)
-#define ZONE_RESTORE_MAX GENMASK(ZONE_RESTORE_BITS - 1, 0)
#if IS_ENABLED(CONFIG_MLX5_TC_CT)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
new file mode 100644
index 000000000000..c223591ffc22
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_PRIV_H__
+#define __MLX5_EN_TC_PRIV_H__
+
+#include "en_tc.h"
+
+#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
+
+#define MLX5E_TC_MAX_SPLITS 1
+
+enum {
+ MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT,
+ MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT,
+ MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
+ MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
+ MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
+ MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE,
+ MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1,
+ MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2,
+ MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3,
+ MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4,
+ MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5,
+ MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6,
+ MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7,
+ MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
+ MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 9,
+ MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 10,
+};
+
+struct mlx5e_tc_flow_parse_attr {
+ const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct net_device *filter_dev;
+ struct mlx5_flow_spec spec;
+ struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
+ int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct ethhdr eth;
+};
+
+/* Helper struct for accessing a struct containing list_head array.
+ * Containing struct
+ * |- Helper array
+ * [0] Helper item 0
+ * |- list_head item 0
+ * |- index (0)
+ * [1] Helper item 1
+ * |- list_head item 1
+ * |- index (1)
+ * To access the containing struct from one of the list_head items:
+ * 1. Get the helper item from the list_head item using
+ * helper item =
+ * container_of(list_head item, helper struct type, list_head field)
+ * 2. Get the contining struct from the helper item and its index in the array:
+ * containing struct =
+ * container_of(helper item, containing struct type, helper field[index])
+ */
+struct encap_flow_item {
+ struct mlx5e_encap_entry *e; /* attached encap instance */
+ struct list_head list;
+ int index;
+};
+
+struct encap_route_flow_item {
+ struct mlx5e_route_entry *r; /* attached route instance */
+ int index;
+};
+
+struct mlx5e_tc_flow {
+ struct rhash_head node;
+ struct mlx5e_priv *priv;
+ u64 cookie;
+ unsigned long flags;
+ struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
+
+ /* flows sharing the same reformat object - currently mpls decap */
+ struct list_head l3_to_l2_reformat;
+ struct mlx5e_decap_entry *decap_reformat;
+
+ /* flows sharing same route entry */
+ struct list_head decap_routes;
+ struct mlx5e_route_entry *decap_route;
+ struct encap_route_flow_item encap_routes[MLX5_MAX_FLOW_FWD_VPORTS];
+
+ /* Flow can be associated with multiple encap IDs.
+ * The number of encaps is bounded by the number of supported
+ * destinations.
+ */
+ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_tc_flow *peer_flow;
+ struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */
+ struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
+ struct list_head hairpin; /* flows sharing the same hairpin */
+ struct list_head peer; /* flows with peer flow */
+ struct list_head unready; /* flows not ready to be offloaded (e.g
+ * due to missing route)
+ */
+ struct net_device *orig_dev; /* netdev adding flow first */
+ int tmp_entry_index;
+ struct list_head tmp_list; /* temporary flow list used by neigh update */
+ refcount_t refcnt;
+ struct rcu_head rcu_head;
+ struct completion init_done;
+ int tunnel_id; /* the mapped tunnel id of this flow */
+ struct mlx5_flow_attr *attr;
+};
+
+u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer);
+
+struct mlx5_flow_handle *
+mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+
+bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow);
+
+static inline void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ /* Complete all memory stores before setting bit. */
+ smp_mb__before_atomic();
+ set_bit(flag, &flow->flags);
+}
+
+#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
+
+static inline bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
+ unsigned long flag)
+{
+ /* test_and_set_bit() provides all necessary barriers */
+ return test_and_set_bit(flag, &flow->flags);
+}
+
+#define flow_flag_test_and_set(flow, flag) \
+ __flow_flag_test_and_set(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+static inline void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ /* Complete all memory stores before clearing bit. */
+ smp_mb__before_atomic();
+ clear_bit(flag, &flow->flags);
+}
+
+#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+static inline bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
+{
+ bool ret = test_bit(flag, &flow->flags);
+
+ /* Read fields of flow structure only after checking flags. */
+ smp_mb__after_atomic();
+ return ret;
+}
+
+#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
+ MLX5E_TC_FLOW_FLAG_##flag)
+
+void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow);
+struct mlx5_flow_handle *
+mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec);
+void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr);
+
+struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow);
+void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow);
+
+struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow);
+
+#endif /* __MLX5_EN_TC_PRIV_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index 90930e54b6f2..f8075a604605 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -6,10 +6,32 @@
#include <net/geneve.h>
#include <net/bareudp.h>
#include "en/tc_tun.h"
+#include "en/tc_priv.h"
#include "en_tc.h"
#include "rep/tc.h"
#include "rep/neigh.h"
+struct mlx5e_tc_tun_route_attr {
+ struct net_device *out_dev;
+ struct net_device *route_dev;
+ union {
+ struct flowi4 fl4;
+ struct flowi6 fl6;
+ } fl;
+ struct neighbour *n;
+ u8 ttl;
+};
+
+#define TC_TUN_ROUTE_ATTR_INIT(name) struct mlx5e_tc_tun_route_attr name = {}
+
+static void mlx5e_tc_tun_route_attr_cleanup(struct mlx5e_tc_tun_route_attr *attr)
+{
+ if (attr->n)
+ neigh_release(attr->n);
+ if (attr->route_dev)
+ dev_put(attr->route_dev);
+}
+
struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
{
if (netif_is_vxlan(tunnel_dev))
@@ -79,12 +101,10 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv,
static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
- struct net_device **out_dev,
- struct net_device **route_dev,
- struct flowi4 *fl4,
- struct neighbour **out_n,
- u8 *out_ttl)
+ struct mlx5e_tc_tun_route_attr *attr)
{
+ struct net_device *route_dev;
+ struct net_device *out_dev;
struct neighbour *n;
struct rtable *rt;
@@ -97,46 +117,50 @@ static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = mdev->priv.eswitch;
uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
- fl4->flowi4_oif = uplink_dev->ifindex;
+ attr->fl.fl4.flowi4_oif = uplink_dev->ifindex;
}
- rt = ip_route_output_key(dev_net(mirred_dev), fl4);
+ rt = ip_route_output_key(dev_net(mirred_dev), &attr->fl.fl4);
if (IS_ERR(rt))
return PTR_ERR(rt);
if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) {
- ip_rt_put(rt);
- return -ENETUNREACH;
+ ret = -ENETUNREACH;
+ goto err_rt_release;
}
#else
return -EOPNOTSUPP;
#endif
- ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev);
- if (ret < 0) {
- ip_rt_put(rt);
- return ret;
- }
- dev_hold(*route_dev);
+ ret = get_route_and_out_devs(priv, rt->dst.dev, &route_dev, &out_dev);
+ if (ret < 0)
+ goto err_rt_release;
+ dev_hold(route_dev);
- if (!(*out_ttl))
- *out_ttl = ip4_dst_hoplimit(&rt->dst);
- n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
- ip_rt_put(rt);
+ if (!attr->ttl)
+ attr->ttl = ip4_dst_hoplimit(&rt->dst);
+ n = dst_neigh_lookup(&rt->dst, &attr->fl.fl4.daddr);
if (!n) {
- dev_put(*route_dev);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err_dev_release;
}
- *out_n = n;
+ ip_rt_put(rt);
+ attr->route_dev = route_dev;
+ attr->out_dev = out_dev;
+ attr->n = n;
return 0;
+
+err_dev_release:
+ dev_put(route_dev);
+err_rt_release:
+ ip_rt_put(rt);
+ return ret;
}
-static void mlx5e_route_lookup_ipv4_put(struct net_device *route_dev,
- struct neighbour *n)
+static void mlx5e_route_lookup_ipv4_put(struct mlx5e_tc_tun_route_attr *attr)
{
- neigh_release(n);
- dev_put(route_dev);
+ mlx5e_tc_tun_route_attr_cleanup(attr);
}
static const char *mlx5e_netdev_kind(struct net_device *dev)
@@ -188,28 +212,26 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
{
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
const struct ip_tunnel_key *tun_key = &e->tun_info->key;
- struct net_device *out_dev, *route_dev;
- struct flowi4 fl4 = {};
- struct neighbour *n;
+ struct mlx5e_neigh m_neigh = {};
+ TC_TUN_ROUTE_ATTR_INIT(attr);
int ipv4_encap_size;
char *encap_header;
- u8 nud_state, ttl;
struct iphdr *ip;
+ u8 nud_state;
int err;
/* add the IP fields */
- fl4.flowi4_tos = tun_key->tos;
- fl4.daddr = tun_key->u.ipv4.dst;
- fl4.saddr = tun_key->u.ipv4.src;
- ttl = tun_key->ttl;
+ attr.fl.fl4.flowi4_tos = tun_key->tos;
+ attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
+ attr.fl.fl4.saddr = tun_key->u.ipv4.src;
+ attr.ttl = tun_key->ttl;
- err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &out_dev, &route_dev,
- &fl4, &n, &ttl);
+ err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr);
if (err)
return err;
ipv4_encap_size =
- (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
sizeof(struct iphdr) +
e->tunnel->calc_hlen(e);
@@ -226,40 +248,36 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
goto release_neigh;
}
- /* used by mlx5e_detach_encap to lookup a neigh hash table
- * entry in the neigh hash table when a user deletes a rule
- */
- e->m_neigh.dev = n->dev;
- e->m_neigh.family = n->ops->family;
- memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
- e->out_dev = out_dev;
- e->route_dev_ifindex = route_dev->ifindex;
+ m_neigh.family = attr.n->ops->family;
+ memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
+ e->out_dev = attr.out_dev;
+ e->route_dev_ifindex = attr.route_dev->ifindex;
/* It's important to add the neigh to the hash table before checking
* the neigh validity state. So if we'll get a notification, in case the
* neigh changes it's validity state, we would find the relevant neigh
* in the hash.
*/
- err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
if (err)
goto free_encap;
- read_lock_bh(&n->lock);
- nud_state = n->nud_state;
- ether_addr_copy(e->h_dest, n->ha);
- read_unlock_bh(&n->lock);
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ read_unlock_bh(&attr.n->lock);
/* add ethernet header */
- ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
+ ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
ETH_P_IP);
/* add ip header */
ip->tos = tun_key->tos;
ip->version = 0x4;
ip->ihl = 0x5;
- ip->ttl = ttl;
- ip->daddr = fl4.daddr;
- ip->saddr = fl4.saddr;
+ ip->ttl = attr.ttl;
+ ip->daddr = attr.fl.fl4.daddr;
+ ip->saddr = attr.fl.fl4.saddr;
/* add tunneling protocol header */
err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
@@ -271,7 +289,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
e->encap_header = encap_header;
if (!(nud_state & NUD_VALID)) {
- neigh_event_send(n, NULL);
+ neigh_event_send(attr.n, NULL);
/* the encap entry will be made valid on neigh update event
* and not used before that.
*/
@@ -287,8 +305,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
}
e->flags |= MLX5_ENCAP_ENTRY_VALID;
- mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
- mlx5e_route_lookup_ipv4_put(route_dev, n);
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv4_put(&attr);
return err;
destroy_neigh_entry:
@@ -296,55 +314,155 @@ destroy_neigh_entry:
free_encap:
kfree(encap_header);
release_neigh:
- mlx5e_route_lookup_ipv4_put(route_dev, n);
+ mlx5e_route_lookup_ipv4_put(&attr);
+ return err;
+}
+
+int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ int ipv4_encap_size;
+ char *encap_header;
+ struct iphdr *ip;
+ u8 nud_state;
+ int err;
+
+ /* add the IP fields */
+ attr.fl.fl4.flowi4_tos = tun_key->tos;
+ attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
+ attr.fl.fl4.saddr = tun_key->u.ipv4.src;
+ attr.ttl = tun_key->ttl;
+
+ err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr);
+ if (err)
+ return err;
+
+ ipv4_encap_size =
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct iphdr) +
+ e->tunnel->calc_hlen(e);
+
+ if (max_encap_size < ipv4_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv4_encap_size, max_encap_size);
+ err = -EOPNOTSUPP;
+ goto release_neigh;
+ }
+
+ encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
+ if (!encap_header) {
+ err = -ENOMEM;
+ goto release_neigh;
+ }
+
+ e->route_dev_ifindex = attr.route_dev->ifindex;
+
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
+ read_unlock_bh(&attr.n->lock);
+
+ /* add ethernet header */
+ ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
+ ETH_P_IP);
+
+ /* add ip header */
+ ip->tos = tun_key->tos;
+ ip->version = 0x4;
+ ip->ihl = 0x5;
+ ip->ttl = attr.ttl;
+ ip->daddr = attr.fl.fl4.daddr;
+ ip->saddr = attr.fl.fl4.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
+ &ip->protocol, e);
+ if (err)
+ goto free_encap;
+
+ e->encap_size = ipv4_encap_size;
+ kfree(e->encap_header);
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(attr.n, NULL);
+ /* the encap entry will be made valid on neigh update event
+ * and not used before that.
+ */
+ goto release_neigh;
+ }
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
+ ipv4_encap_size, encap_header,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ err = PTR_ERR(e->pkt_reformat);
+ goto free_encap;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv4_put(&attr);
+ return err;
+
+free_encap:
+ kfree(encap_header);
+release_neigh:
+ mlx5e_route_lookup_ipv4_put(&attr);
return err;
}
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
- struct net_device **out_dev,
- struct net_device **route_dev,
- struct flowi6 *fl6,
- struct neighbour **out_n,
- u8 *out_ttl)
+ struct mlx5e_tc_tun_route_attr *attr)
{
+ struct net_device *route_dev;
+ struct net_device *out_dev;
struct dst_entry *dst;
struct neighbour *n;
-
int ret;
- dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, fl6,
+ dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, &attr->fl.fl6,
NULL);
if (IS_ERR(dst))
return PTR_ERR(dst);
- if (!(*out_ttl))
- *out_ttl = ip6_dst_hoplimit(dst);
+ if (!attr->ttl)
+ attr->ttl = ip6_dst_hoplimit(dst);
- ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev);
- if (ret < 0) {
- dst_release(dst);
- return ret;
- }
+ ret = get_route_and_out_devs(priv, dst->dev, &route_dev, &out_dev);
+ if (ret < 0)
+ goto err_dst_release;
- dev_hold(*route_dev);
- n = dst_neigh_lookup(dst, &fl6->daddr);
- dst_release(dst);
+ dev_hold(route_dev);
+ n = dst_neigh_lookup(dst, &attr->fl.fl6.daddr);
if (!n) {
- dev_put(*route_dev);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err_dev_release;
}
- *out_n = n;
+ dst_release(dst);
+ attr->out_dev = out_dev;
+ attr->route_dev = route_dev;
+ attr->n = n;
return 0;
+
+err_dev_release:
+ dev_put(route_dev);
+err_dst_release:
+ dst_release(dst);
+ return ret;
}
-static void mlx5e_route_lookup_ipv6_put(struct net_device *route_dev,
- struct neighbour *n)
+static void mlx5e_route_lookup_ipv6_put(struct mlx5e_tc_tun_route_attr *attr)
{
- neigh_release(n);
- dev_put(route_dev);
+ mlx5e_tc_tun_route_attr_cleanup(attr);
}
int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
@@ -353,28 +471,25 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
{
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
const struct ip_tunnel_key *tun_key = &e->tun_info->key;
- struct net_device *out_dev, *route_dev;
- struct flowi6 fl6 = {};
+ struct mlx5e_neigh m_neigh = {};
+ TC_TUN_ROUTE_ATTR_INIT(attr);
struct ipv6hdr *ip6h;
- struct neighbour *n = NULL;
int ipv6_encap_size;
char *encap_header;
- u8 nud_state, ttl;
+ u8 nud_state;
int err;
- ttl = tun_key->ttl;
+ attr.ttl = tun_key->ttl;
+ attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
+ attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
+ attr.fl.fl6.saddr = tun_key->u.ipv6.src;
- fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
- fl6.daddr = tun_key->u.ipv6.dst;
- fl6.saddr = tun_key->u.ipv6.src;
-
- err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &out_dev, &route_dev,
- &fl6, &n, &ttl);
+ err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr);
if (err)
return err;
ipv6_encap_size =
- (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
sizeof(struct ipv6hdr) +
e->tunnel->calc_hlen(e);
@@ -391,39 +506,35 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
goto release_neigh;
}
- /* used by mlx5e_detach_encap to lookup a neigh hash table
- * entry in the neigh hash table when a user deletes a rule
- */
- e->m_neigh.dev = n->dev;
- e->m_neigh.family = n->ops->family;
- memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
- e->out_dev = out_dev;
- e->route_dev_ifindex = route_dev->ifindex;
+ m_neigh.family = attr.n->ops->family;
+ memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
+ e->out_dev = attr.out_dev;
+ e->route_dev_ifindex = attr.route_dev->ifindex;
/* It's importent to add the neigh to the hash table before checking
* the neigh validity state. So if we'll get a notification, in case the
* neigh changes it's validity state, we would find the relevant neigh
* in the hash.
*/
- err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
if (err)
goto free_encap;
- read_lock_bh(&n->lock);
- nud_state = n->nud_state;
- ether_addr_copy(e->h_dest, n->ha);
- read_unlock_bh(&n->lock);
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ read_unlock_bh(&attr.n->lock);
/* add ethernet header */
- ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
+ ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
ETH_P_IPV6);
/* add ip header */
ip6_flow_hdr(ip6h, tun_key->tos, 0);
/* the HW fills up ipv6 payload len */
- ip6h->hop_limit = ttl;
- ip6h->daddr = fl6.daddr;
- ip6h->saddr = fl6.saddr;
+ ip6h->hop_limit = attr.ttl;
+ ip6h->daddr = attr.fl.fl6.daddr;
+ ip6h->saddr = attr.fl.fl6.saddr;
/* add tunneling protocol header */
err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
@@ -435,7 +546,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
e->encap_header = encap_header;
if (!(nud_state & NUD_VALID)) {
- neigh_event_send(n, NULL);
+ neigh_event_send(attr.n, NULL);
/* the encap entry will be made valid on neigh update event
* and not used before that.
*/
@@ -452,8 +563,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
}
e->flags |= MLX5_ENCAP_ENTRY_VALID;
- mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
- mlx5e_route_lookup_ipv6_put(route_dev, n);
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv6_put(&attr);
return err;
destroy_neigh_entry:
@@ -461,10 +572,160 @@ destroy_neigh_entry:
free_encap:
kfree(encap_header);
release_neigh:
- mlx5e_route_lookup_ipv6_put(route_dev, n);
+ mlx5e_route_lookup_ipv6_put(&attr);
return err;
}
+
+int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ struct ipv6hdr *ip6h;
+ int ipv6_encap_size;
+ char *encap_header;
+ u8 nud_state;
+ int err;
+
+ attr.ttl = tun_key->ttl;
+
+ attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
+ attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
+ attr.fl.fl6.saddr = tun_key->u.ipv6.src;
+
+ err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr);
+ if (err)
+ return err;
+
+ ipv6_encap_size =
+ (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct ipv6hdr) +
+ e->tunnel->calc_hlen(e);
+
+ if (max_encap_size < ipv6_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv6_encap_size, max_encap_size);
+ err = -EOPNOTSUPP;
+ goto release_neigh;
+ }
+
+ encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
+ if (!encap_header) {
+ err = -ENOMEM;
+ goto release_neigh;
+ }
+
+ e->route_dev_ifindex = attr.route_dev->ifindex;
+
+ read_lock_bh(&attr.n->lock);
+ nud_state = attr.n->nud_state;
+ ether_addr_copy(e->h_dest, attr.n->ha);
+ WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
+ read_unlock_bh(&attr.n->lock);
+
+ /* add ethernet header */
+ ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
+ ETH_P_IPV6);
+
+ /* add ip header */
+ ip6_flow_hdr(ip6h, tun_key->tos, 0);
+ /* the HW fills up ipv6 payload len */
+ ip6h->hop_limit = attr.ttl;
+ ip6h->daddr = attr.fl.fl6.daddr;
+ ip6h->saddr = attr.fl.fl6.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
+ &ip6h->nexthdr, e);
+ if (err)
+ goto free_encap;
+
+ e->encap_size = ipv6_encap_size;
+ kfree(e->encap_header);
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(attr.n, NULL);
+ /* the encap entry will be made valid on neigh update event
+ * and not used before that.
+ */
+ goto release_neigh;
+ }
+
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
+ ipv6_encap_size, encap_header,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ err = PTR_ERR(e->pkt_reformat);
+ goto free_encap;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
+ mlx5e_route_lookup_ipv6_put(&attr);
+ return err;
+
+free_encap:
+ kfree(encap_header);
+release_neigh:
+ mlx5e_route_lookup_ipv6_put(&attr);
+ return err;
+}
+#endif
+
+int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *flow_attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr;
+ TC_TUN_ROUTE_ATTR_INIT(attr);
+ u16 vport_num;
+ int err = 0;
+
+ if (flow_attr->ip_version == 4) {
+ /* Addresses are swapped for decap */
+ attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4;
+ attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4;
+ err = mlx5e_route_lookup_ipv4_get(priv, priv->netdev, &attr);
+ }
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ else if (flow_attr->ip_version == 6) {
+ /* Addresses are swapped for decap */
+ attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6;
+ attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6;
+ err = mlx5e_route_lookup_ipv6_get(priv, priv->netdev, &attr);
+ }
#endif
+ else
+ return 0;
+
+ if (err)
+ return err;
+
+ if (attr.route_dev->netdev_ops != &mlx5e_netdev_ops ||
+ !mlx5e_tc_is_vf_tunnel(attr.out_dev, attr.route_dev))
+ goto out;
+
+ err = mlx5e_tc_query_route_vport(attr.out_dev, attr.route_dev, &vport_num);
+ if (err)
+ goto out;
+
+ esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value,
+ misc_parameters.vxlan_vni);
+ esw_attr->rx_tun_attr->decap_vport = vport_num;
+
+out:
+ if (flow_attr->ip_version == 4)
+ mlx5e_route_lookup_ipv4_put(&attr);
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ else if (flow_attr->ip_version == 6)
+ mlx5e_route_lookup_ipv6_put(&attr);
+#endif
+ return err;
+}
bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
struct net_device *netdev)
@@ -625,14 +886,6 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
}
}
- /* Enforce DMAC when offloading incoming tunneled flows.
- * Flow counters require a match on the DMAC.
- */
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dmac_47_16), priv->netdev->dev_addr);
-
/* let software handle IP fragments */
MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
index 704359df6095..fa992e869044 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -59,17 +59,30 @@ int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5e_encap_entry *e);
+int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5e_encap_entry *e);
+int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
#else
static inline int
mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5e_encap_entry *e) { return -EOPNOTSUPP; }
+int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{ return -EOPNOTSUPP; }
#endif
+int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
struct net_device *netdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
new file mode 100644
index 000000000000..6a116335bb21
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -0,0 +1,1653 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <net/fib_notifier.h>
+#include "tc_tun_encap.h"
+#include "en_tc.h"
+#include "tc_tun.h"
+#include "rep/tc.h"
+#include "diag/en_tc_tracepoint.h"
+
+enum {
+ MLX5E_ROUTE_ENTRY_VALID = BIT(0),
+};
+
+struct mlx5e_route_key {
+ int ip_version;
+ union {
+ __be32 v4;
+ struct in6_addr v6;
+ } endpoint_ip;
+};
+
+struct mlx5e_route_entry {
+ struct mlx5e_route_key key;
+ struct list_head encap_entries;
+ struct list_head decap_flows;
+ u32 flags;
+ struct hlist_node hlist;
+ refcount_t refcnt;
+ int tunnel_dev_index;
+ struct rcu_head rcu;
+};
+
+struct mlx5e_tc_tun_encap {
+ struct mlx5e_priv *priv;
+ struct notifier_block fib_nb;
+ spinlock_t route_lock; /* protects route_tbl */
+ unsigned long route_tbl_last_update;
+ DECLARE_HASHTABLE(route_tbl, 8);
+};
+
+static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
+{
+ return r->flags & MLX5E_ROUTE_ENTRY_VALID;
+}
+
+int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec)
+{
+ struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
+ struct mlx5_rx_tun_attr *tun_attr;
+ void *daddr, *saddr;
+ u8 ip_version;
+
+ tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
+ if (!tun_attr)
+ return -ENOMEM;
+
+ esw_attr->rx_tun_attr = tun_attr;
+ ip_version = mlx5e_tc_get_ip_version(spec, true);
+
+ if (ip_version == 4) {
+ daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ tun_attr->dst_ip.v4 = *(__be32 *)daddr;
+ tun_attr->src_ip.v4 = *(__be32 *)saddr;
+ if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
+ return 0;
+ }
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ else if (ip_version == 6) {
+ int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
+ struct in6_addr zerov6 = {};
+
+ daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+ saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
+ memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
+ memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
+ if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) ||
+ !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6)))
+ return 0;
+ }
+#endif
+ /* Only set the flag if both src and dst ip addresses exist. They are
+ * required to establish routing.
+ */
+ flow_flag_set(flow, TUN_RX);
+ return 0;
+}
+
+static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
+{
+ bool all_flow_encaps_valid = true;
+ int i;
+
+ /* Flow can be associated with multiple encap entries.
+ * Before offloading the flow verify that all of them have
+ * a valid neighbour.
+ */
+ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
+ all_flow_encaps_valid = false;
+ break;
+ }
+ }
+
+ return all_flow_encaps_valid;
+}
+
+void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *flow_list)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_spec *spec;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
+ return;
+
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
+ e->encap_size, e->encap_header,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
+ PTR_ERR(e->pkt_reformat));
+ return;
+ }
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(priv);
+
+ list_for_each_entry(flow, flow_list, tmp_list) {
+ if (!mlx5e_is_offloaded_flow(flow))
+ continue;
+ attr = flow->attr;
+ esw_attr = attr->esw_attr;
+ spec = &attr->parse_attr->spec;
+
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
+ esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+
+ /* Do not offload flows with unresolved neighbors */
+ if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
+ continue;
+ /* update from slow path rule to encap rule */
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
+ err);
+ continue;
+ }
+
+ mlx5e_tc_unoffload_from_slow_path(esw, flow);
+ flow->rule[0] = rule;
+ /* was unset when slow path rule removed */
+ flow_flag_set(flow, OFFLOADED);
+ }
+}
+
+void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *flow_list)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_spec *spec;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ list_for_each_entry(flow, flow_list, tmp_list) {
+ if (!mlx5e_is_offloaded_flow(flow))
+ continue;
+ attr = flow->attr;
+ esw_attr = attr->esw_attr;
+ spec = &attr->parse_attr->spec;
+
+ /* update from encap rule to slow path rule */
+ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
+ /* mark the flow's encap dest as non-valid */
+ esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
+ err);
+ continue;
+ }
+
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
+ flow->rule[0] = rule;
+ /* was unset when fast path rule removed */
+ flow_flag_set(flow, OFFLOADED);
+ }
+
+ /* we know that the encap is valid */
+ e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+}
+
+static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
+ struct list_head *flow_list,
+ int index)
+{
+ if (IS_ERR(mlx5e_flow_get(flow)))
+ return;
+ wait_for_completion(&flow->init_done);
+
+ flow->tmp_entry_index = index;
+ list_add(&flow->tmp_list, flow_list);
+}
+
+/* Takes reference to all flows attached to encap and adds the flows to
+ * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
+ */
+void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
+{
+ struct encap_flow_item *efi;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(efi, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
+ mlx5e_take_tmp_flow(flow, flow_list, efi->index);
+ }
+}
+
+/* Takes reference to all flows attached to route and adds the flows to
+ * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
+ */
+static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
+ struct list_head *flow_list)
+{
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, &r->decap_flows, decap_routes)
+ mlx5e_take_tmp_flow(flow, flow_list, 0);
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_encap_entry *next = NULL;
+
+retry:
+ rcu_read_lock();
+
+ /* find encap with non-zero reference counter value */
+ for (next = e ?
+ list_next_or_null_rcu(&nhe->encap_list,
+ &e->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list) :
+ list_first_or_null_rcu(&nhe->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list);
+ next;
+ next = list_next_or_null_rcu(&nhe->encap_list,
+ &next->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list))
+ if (mlx5e_encap_take(next))
+ break;
+
+ rcu_read_unlock();
+
+ /* release starting encap */
+ if (e)
+ mlx5e_encap_put(netdev_priv(e->out_dev), e);
+ if (!next)
+ return next;
+
+ /* wait for encap to be fully initialized */
+ wait_for_completion(&next->res_ready);
+ /* continue searching if encap entry is not in valid state after completion */
+ if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
+ e = next;
+ goto retry;
+ }
+
+ return next;
+}
+
+void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
+ struct mlx5e_encap_entry *e = NULL;
+ struct mlx5e_tc_flow *flow;
+ struct mlx5_fc *counter;
+ struct neigh_table *tbl;
+ bool neigh_used = false;
+ struct neighbour *n;
+ u64 lastuse;
+
+ if (m_neigh->family == AF_INET)
+ tbl = &arp_tbl;
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (m_neigh->family == AF_INET6)
+ tbl = ipv6_stub->nd_tbl;
+#endif
+ else
+ return;
+
+ /* mlx5e_get_next_valid_encap() releases previous encap before returning
+ * next one.
+ */
+ while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
+ struct mlx5e_priv *priv = netdev_priv(e->out_dev);
+ struct encap_flow_item *efi, *tmp;
+ struct mlx5_eswitch *esw;
+ LIST_HEAD(flow_list);
+
+ esw = priv->mdev->priv.eswitch;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_for_each_entry_safe(efi, tmp, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow,
+ encaps[efi->index]);
+ if (IS_ERR(mlx5e_flow_get(flow)))
+ continue;
+ list_add(&flow->tmp_list, &flow_list);
+
+ if (mlx5e_is_offloaded_flow(flow)) {
+ counter = mlx5e_tc_get_counter(flow);
+ lastuse = mlx5_fc_query_lastuse(counter);
+ if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
+ neigh_used = true;
+ break;
+ }
+ }
+ }
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_put_flow_list(priv, &flow_list);
+ if (neigh_used) {
+ /* release current encap before breaking the loop */
+ mlx5e_encap_put(priv, e);
+ break;
+ }
+ }
+
+ trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
+
+ if (neigh_used) {
+ nhe->reported_lastuse = jiffies;
+
+ /* find the relevant neigh according to the cached device and
+ * dst ip pair
+ */
+ n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
+ if (!n)
+ return;
+
+ neigh_event_send(n, NULL);
+ neigh_release(n);
+ }
+}
+
+static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+{
+ WARN_ON(!list_empty(&e->flows));
+
+ if (e->compl_result > 0) {
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID)
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+ }
+
+ kfree(e->tun_info);
+ kfree(e->encap_header);
+ kfree_rcu(e, rcu);
+}
+
+static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
+ struct mlx5e_decap_entry *d)
+{
+ WARN_ON(!list_empty(&d->flows));
+
+ if (!d->compl_result)
+ mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
+
+ kfree_rcu(d, rcu);
+}
+
+void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
+ return;
+ list_del(&e->route_list);
+ hash_del_rcu(&e->encap_hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_encap_dealloc(priv, e);
+}
+
+static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
+ return;
+ hash_del_rcu(&d->hlist);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ mlx5e_decap_dealloc(priv, d);
+}
+
+static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index);
+
+void mlx5e_detach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow, int out_index)
+{
+ struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (flow->attr->esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+ mlx5e_detach_encap_route(priv, flow, out_index);
+
+ /* flow wasn't fully initialized */
+ if (!e)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_del(&flow->encaps[out_index].list);
+ flow->encaps[out_index].e = NULL;
+ if (!refcount_dec_and_test(&e->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ list_del(&e->route_list);
+ hash_del_rcu(&e->encap_hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_encap_dealloc(priv, e);
+}
+
+void mlx5e_detach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_decap_entry *d = flow->decap_reformat;
+
+ if (!d)
+ return;
+
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ list_del(&flow->l3_to_l2_reformat);
+ flow->decap_reformat = NULL;
+
+ if (!refcount_dec_and_test(&d->refcnt)) {
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return;
+ }
+ hash_del_rcu(&d->hlist);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ mlx5e_decap_dealloc(priv, d);
+}
+
+struct encap_key {
+ const struct ip_tunnel_key *ip_tun_key;
+ struct mlx5e_tc_tunnel *tc_tunnel;
+};
+
+static int cmp_encap_info(struct encap_key *a,
+ struct encap_key *b)
+{
+ return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
+ a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
+}
+
+static int cmp_decap_info(struct mlx5e_decap_key *a,
+ struct mlx5e_decap_key *b)
+{
+ return memcmp(&a->key, &b->key, sizeof(b->key));
+}
+
+static int hash_encap_info(struct encap_key *key)
+{
+ return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
+ key->tc_tunnel->tunnel_type);
+}
+
+static int hash_decap_info(struct mlx5e_decap_key *key)
+{
+ return jhash(&key->key, sizeof(key->key), 0);
+}
+
+bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
+{
+ return refcount_inc_not_zero(&e->refcnt);
+}
+
+static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
+{
+ return refcount_inc_not_zero(&e->refcnt);
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
+ uintptr_t hash_key)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_encap_entry *e;
+ struct encap_key e_key;
+
+ hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
+ encap_hlist, hash_key) {
+ e_key.ip_tun_key = &e->tun_info->key;
+ e_key.tc_tunnel = e->tunnel;
+ if (!cmp_encap_info(&e_key, key) &&
+ mlx5e_encap_take(e))
+ return e;
+ }
+
+ return NULL;
+}
+
+static struct mlx5e_decap_entry *
+mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
+ uintptr_t hash_key)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_decap_key r_key;
+ struct mlx5e_decap_entry *e;
+
+ hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
+ hlist, hash_key) {
+ r_key = e->key;
+ if (!cmp_decap_info(&r_key, key) &&
+ mlx5e_decap_take(e))
+ return e;
+ }
+ return NULL;
+}
+
+struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
+{
+ size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
+
+ return kmemdup(tun_info, tun_size, GFP_KERNEL);
+}
+
+static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ int i;
+
+ for (i = 0; i < out_index; i++) {
+ if (flow->encaps[i].e != e)
+ continue;
+ NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
+ netdev_err(priv->netdev, "can't duplicate encap action\n");
+ return true;
+ }
+
+ return false;
+}
+
+static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ struct net_device *out_dev,
+ int route_dev_ifindex,
+ int out_index)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *route_dev;
+ u16 vport_num;
+ int err = 0;
+ u32 data;
+
+ route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
+
+ if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
+ !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
+ goto out;
+
+ err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
+ if (err)
+ goto out;
+
+ attr->dest_chain = 0;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
+ data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
+ vport_num);
+ err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
+ MLX5_FLOW_NAMESPACE_FDB,
+ VPORT_TO_REG, data);
+ if (err >= 0) {
+ esw_attr->dests[out_index].src_port_rewrite_act_id = err;
+ err = 0;
+ }
+
+out:
+ if (route_dev)
+ dev_put(route_dev);
+ return err;
+}
+
+static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
+ struct mlx5_esw_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ struct net_device *out_dev,
+ int route_dev_ifindex,
+ int out_index)
+{
+ int act_id = attr->dests[out_index].src_port_rewrite_act_id;
+ struct net_device *route_dev;
+ u16 vport_num;
+ int err = 0;
+ u32 data;
+
+ route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
+
+ if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
+ !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
+ if (err)
+ goto out;
+
+ data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
+ vport_num);
+ mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
+
+out:
+ if (route_dev)
+ dev_put(route_dev);
+ return err;
+}
+
+static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_tun_encap *encap;
+ unsigned int ret;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ encap = uplink_priv->encap;
+
+ spin_lock_bh(&encap->route_lock);
+ ret = encap->route_tbl_last_update;
+ spin_unlock_bh(&encap->route_lock);
+ return ret;
+}
+
+static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5e_encap_entry *e,
+ bool new_encap_entry,
+ unsigned long tbl_time_before,
+ int out_index);
+
+int mlx5e_attach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct net_device *mirred_dev,
+ int out_index,
+ struct netlink_ext_ack *extack,
+ struct net_device **encap_dev,
+ bool *encap_valid)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ const struct ip_tunnel_info *tun_info;
+ unsigned long tbl_time_before = 0;
+ struct encap_key key;
+ struct mlx5e_encap_entry *e;
+ bool entry_created = false;
+ unsigned short family;
+ uintptr_t hash_key;
+ int err = 0;
+
+ parse_attr = attr->parse_attr;
+ tun_info = parse_attr->tun_info[out_index];
+ family = ip_tunnel_info_af(tun_info);
+ key.ip_tun_key = &tun_info->key;
+ key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
+ if (!key.tc_tunnel) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
+ return -EOPNOTSUPP;
+ }
+
+ hash_key = hash_encap_info(&key);
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ e = mlx5e_encap_get(priv, &key, hash_key);
+
+ /* must verify if encap is valid or not */
+ if (e) {
+ /* Check that entry was not already attached to this flow */
+ if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
+ err = -EOPNOTSUPP;
+ goto out_err;
+ }
+
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ wait_for_completion(&e->res_ready);
+
+ /* Protect against concurrent neigh update. */
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ if (e->compl_result < 0) {
+ err = -EREMOTEIO;
+ goto out_err;
+ }
+ goto attach_flow;
+ }
+
+ e = kzalloc(sizeof(*e), GFP_KERNEL);
+ if (!e) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ refcount_set(&e->refcnt, 1);
+ init_completion(&e->res_ready);
+ entry_created = true;
+ INIT_LIST_HEAD(&e->route_list);
+
+ tun_info = mlx5e_dup_tun_info(tun_info);
+ if (!tun_info) {
+ err = -ENOMEM;
+ goto out_err_init;
+ }
+ e->tun_info = tun_info;
+ err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
+ if (err)
+ goto out_err_init;
+
+ INIT_LIST_HEAD(&e->flows);
+ hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
+ tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ if (family == AF_INET)
+ err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
+ else if (family == AF_INET6)
+ err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
+
+ /* Protect against concurrent neigh update. */
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ complete_all(&e->res_ready);
+ if (err) {
+ e->compl_result = err;
+ goto out_err;
+ }
+ e->compl_result = 1;
+
+attach_flow:
+ err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before,
+ out_index);
+ if (err)
+ goto out_err;
+
+ flow->encaps[out_index].e = e;
+ list_add(&flow->encaps[out_index].list, &e->flows);
+ flow->encaps[out_index].index = out_index;
+ *encap_dev = e->out_dev;
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
+ attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ *encap_valid = true;
+ } else {
+ *encap_valid = false;
+ }
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ return err;
+
+out_err:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ if (e)
+ mlx5e_encap_put(priv, e);
+ return err;
+
+out_err_init:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ kfree(tun_info);
+ kfree(e);
+ return err;
+}
+
+int mlx5e_attach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_decap_entry *d;
+ struct mlx5e_decap_key key;
+ uintptr_t hash_key;
+ int err = 0;
+
+ parse_attr = flow->attr->parse_attr;
+ if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "encap header larger than max supported");
+ return -EOPNOTSUPP;
+ }
+
+ key.key = parse_attr->eth;
+ hash_key = hash_decap_info(&key);
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ d = mlx5e_decap_get(priv, &key, hash_key);
+ if (d) {
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ wait_for_completion(&d->res_ready);
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ if (d->compl_result) {
+ err = -EREMOTEIO;
+ goto out_free;
+ }
+ goto found;
+ }
+
+ d = kzalloc(sizeof(*d), GFP_KERNEL);
+ if (!d) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ d->key = key;
+ refcount_set(&d->refcnt, 1);
+ init_completion(&d->res_ready);
+ INIT_LIST_HEAD(&d->flows);
+ hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
+ sizeof(parse_attr->eth),
+ &parse_attr->eth,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(d->pkt_reformat)) {
+ err = PTR_ERR(d->pkt_reformat);
+ d->compl_result = err;
+ }
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ complete_all(&d->res_ready);
+ if (err)
+ goto out_free;
+
+found:
+ flow->decap_reformat = d;
+ attr->decap_pkt_reformat = d->pkt_reformat;
+ list_add(&flow->l3_to_l2_reformat, &d->flows);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return 0;
+
+out_free:
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ mlx5e_decap_put(priv, d);
+ return err;
+
+out_err:
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return err;
+}
+
+static int cmp_route_info(struct mlx5e_route_key *a,
+ struct mlx5e_route_key *b)
+{
+ if (a->ip_version == 4 && b->ip_version == 4)
+ return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
+ sizeof(a->endpoint_ip.v4));
+ else if (a->ip_version == 6 && b->ip_version == 6)
+ return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
+ sizeof(a->endpoint_ip.v6));
+ return 1;
+}
+
+static u32 hash_route_info(struct mlx5e_route_key *key)
+{
+ if (key->ip_version == 4)
+ return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
+ return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
+}
+
+static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r)
+{
+ WARN_ON(!list_empty(&r->decap_flows));
+ WARN_ON(!list_empty(&r->encap_entries));
+
+ kfree_rcu(r, rcu);
+}
+
+static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
+ return;
+
+ hash_del_rcu(&r->hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_route_dealloc(priv, r);
+}
+
+static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ lockdep_assert_held(&esw->offloads.encap_tbl_lock);
+
+ if (!refcount_dec_and_test(&r->refcnt))
+ return;
+ hash_del_rcu(&r->hlist);
+ mlx5e_route_dealloc(priv, r);
+}
+
+static struct mlx5e_route_entry *
+mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
+ u32 hash_key)
+{
+ struct mlx5e_route_key r_key;
+ struct mlx5e_route_entry *r;
+
+ hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
+ r_key = r->key;
+ if (!cmp_route_info(&r_key, key) &&
+ refcount_inc_not_zero(&r->refcnt))
+ return r;
+ }
+ return NULL;
+}
+
+static struct mlx5e_route_entry *
+mlx5e_route_get_create(struct mlx5e_priv *priv,
+ struct mlx5e_route_key *key,
+ int tunnel_dev_index,
+ unsigned long *route_tbl_change_time)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_tc_tun_encap *encap;
+ struct mlx5e_route_entry *r;
+ u32 hash_key;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ encap = uplink_priv->encap;
+
+ hash_key = hash_route_info(key);
+ spin_lock_bh(&encap->route_lock);
+ r = mlx5e_route_get(encap, key, hash_key);
+ spin_unlock_bh(&encap->route_lock);
+ if (r) {
+ if (!mlx5e_route_entry_valid(r)) {
+ mlx5e_route_put_locked(priv, r);
+ return ERR_PTR(-EINVAL);
+ }
+ return r;
+ }
+
+ r = kzalloc(sizeof(*r), GFP_KERNEL);
+ if (!r)
+ return ERR_PTR(-ENOMEM);
+
+ r->key = *key;
+ r->flags |= MLX5E_ROUTE_ENTRY_VALID;
+ r->tunnel_dev_index = tunnel_dev_index;
+ refcount_set(&r->refcnt, 1);
+ INIT_LIST_HEAD(&r->decap_flows);
+ INIT_LIST_HEAD(&r->encap_entries);
+
+ spin_lock_bh(&encap->route_lock);
+ *route_tbl_change_time = encap->route_tbl_last_update;
+ hash_add(encap->route_tbl, &r->hlist, hash_key);
+ spin_unlock_bh(&encap->route_lock);
+
+ return r;
+}
+
+static struct mlx5e_route_entry *
+mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
+{
+ u32 hash_key = hash_route_info(key);
+ struct mlx5e_route_entry *r;
+
+ spin_lock_bh(&encap->route_lock);
+ encap->route_tbl_last_update = jiffies;
+ r = mlx5e_route_get(encap, key, hash_key);
+ spin_unlock_bh(&encap->route_lock);
+
+ return r;
+}
+
+struct mlx5e_tc_fib_event_data {
+ struct work_struct work;
+ unsigned long event;
+ struct mlx5e_route_entry *r;
+ struct net_device *ul_dev;
+};
+
+static void mlx5e_tc_fib_event_work(struct work_struct *work);
+static struct mlx5e_tc_fib_event_data *
+mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
+{
+ struct mlx5e_tc_fib_event_data *fib_work;
+
+ fib_work = kzalloc(sizeof(*fib_work), flags);
+ if (WARN_ON(!fib_work))
+ return NULL;
+
+ INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
+ fib_work->event = event;
+ fib_work->ul_dev = ul_dev;
+
+ return fib_work;
+}
+
+static int
+mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r,
+ unsigned long event)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct net_device *ul_dev;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ ul_dev = uplink_rpriv->netdev;
+
+ fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
+ if (!fib_work)
+ return -ENOMEM;
+
+ dev_hold(ul_dev);
+ refcount_inc(&r->refcnt);
+ fib_work->r = r;
+ queue_work(priv->wq, &fib_work->work);
+
+ return 0;
+}
+
+int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ unsigned long tbl_time_before, tbl_time_after;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ int err = 0;
+
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ if (!esw_attr->rx_tun_attr)
+ goto out;
+
+ tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
+ tbl_time_after = tbl_time_before;
+ err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
+ if (err || !esw_attr->rx_tun_attr->decap_vport)
+ goto out;
+
+ key.ip_version = attr->ip_version;
+ if (key.ip_version == 4)
+ key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
+ else
+ key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
+
+ r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
+ &tbl_time_after);
+ if (IS_ERR(r)) {
+ err = PTR_ERR(r);
+ goto out;
+ }
+ /* Routing changed concurrently. FIB event handler might have missed new
+ * entry, schedule update.
+ */
+ if (tbl_time_before != tbl_time_after) {
+ err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
+ if (err) {
+ mlx5e_route_put_locked(priv, r);
+ goto out;
+ }
+ }
+
+ flow->decap_route = r;
+ list_add(&flow->decap_routes, &r->decap_flows);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return 0;
+
+out:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return err;
+}
+
+static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5e_encap_entry *e,
+ bool new_encap_entry,
+ unsigned long tbl_time_before,
+ int out_index)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ unsigned long tbl_time_after = tbl_time_before;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ const struct ip_tunnel_info *tun_info;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ unsigned short family;
+ int err = 0;
+
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ tun_info = parse_attr->tun_info[out_index];
+ family = ip_tunnel_info_af(tun_info);
+
+ if (family == AF_INET) {
+ key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
+ key.ip_version = 4;
+ } else if (family == AF_INET6) {
+ key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
+ key.ip_version = 6;
+ }
+
+ err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
+ e->route_dev_ifindex, out_index);
+ if (err || !(esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
+ return err;
+
+ r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
+ &tbl_time_after);
+ if (IS_ERR(r))
+ return PTR_ERR(r);
+ /* Routing changed concurrently. FIB event handler might have missed new
+ * entry, schedule update.
+ */
+ if (tbl_time_before != tbl_time_after) {
+ err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
+ if (err) {
+ mlx5e_route_put_locked(priv, r);
+ return err;
+ }
+ }
+
+ flow->encap_routes[out_index].r = r;
+ if (new_encap_entry)
+ list_add(&e->route_list, &r->encap_entries);
+ flow->encap_routes[out_index].index = out_index;
+ return 0;
+}
+
+void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_route_entry *r = flow->decap_route;
+
+ if (!r)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_del(&flow->decap_routes);
+ flow->decap_route = NULL;
+
+ if (!refcount_dec_and_test(&r->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ hash_del_rcu(&r->hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_route_dealloc(priv, r);
+}
+
+static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index)
+{
+ struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_encap_entry *e, *tmp;
+
+ if (!r)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ flow->encap_routes[out_index].r = NULL;
+
+ if (!refcount_dec_and_test(&r->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
+ list_del_init(&e->route_list);
+ hash_del_rcu(&r->hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_route_dealloc(priv, r);
+}
+
+static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *encap_flows)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, encap_flows, tmp_list) {
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+
+ if (!mlx5e_is_offloaded_flow(flow))
+ continue;
+ esw_attr = attr->esw_attr;
+
+ if (flow_flag_test(flow, SLOW))
+ mlx5e_tc_unoffload_from_slow_path(esw, flow);
+ else
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+ mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
+ attr->modify_hdr = NULL;
+
+ esw_attr->dests[flow->tmp_entry_index].flags &=
+ ~MLX5_ESW_DEST_ENCAP_VALID;
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
+ }
+
+ e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+ e->pkt_reformat = NULL;
+ }
+}
+
+static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
+ struct net_device *tunnel_dev,
+ struct mlx5e_encap_entry *e,
+ struct list_head *encap_flows)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
+ mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
+ mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
+ if (err)
+ mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
+ e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
+
+ list_for_each_entry(flow, encap_flows, tmp_list) {
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+
+ if (flow_flag_test(flow, FAILED))
+ continue;
+
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+ spec = &parse_attr->spec;
+
+ err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
+ e->out_dev, e->route_dev_ifindex,
+ flow->tmp_entry_index);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
+ continue;
+ }
+
+ err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
+ err);
+ continue;
+ }
+
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
+ esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
+ goto offload_to_slow_path;
+ /* update from slow path rule to encap rule */
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
+ err);
+ } else {
+ flow->rule[0] = rule;
+ }
+ } else {
+offload_to_slow_path:
+ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
+ /* mark the flow's encap dest as non-valid */
+ esw_attr->dests[flow->tmp_entry_index].flags &=
+ ~MLX5_ESW_DEST_ENCAP_VALID;
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
+ err);
+ } else {
+ flow->rule[0] = rule;
+ }
+ }
+ flow_flag_set(flow, OFFLOADED);
+ }
+}
+
+static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r,
+ struct list_head *flow_list,
+ bool replace)
+{
+ struct net_device *tunnel_dev;
+ struct mlx5e_encap_entry *e;
+
+ tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
+ if (!tunnel_dev)
+ return -ENODEV;
+
+ list_for_each_entry(e, &r->encap_entries, route_list) {
+ LIST_HEAD(encap_flows);
+
+ mlx5e_take_all_encap_flows(e, &encap_flows);
+ if (list_empty(&encap_flows))
+ continue;
+
+ if (mlx5e_route_entry_valid(r))
+ mlx5e_invalidate_encap(priv, e, &encap_flows);
+
+ if (!replace) {
+ list_splice(&encap_flows, flow_list);
+ continue;
+ }
+
+ mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
+ list_splice(&encap_flows, flow_list);
+ }
+
+ return 0;
+}
+
+static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
+ struct list_head *flow_list)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, flow_list, tmp_list)
+ if (mlx5e_is_offloaded_flow(flow))
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+}
+
+static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
+ struct list_head *decap_flows)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, decap_flows, tmp_list) {
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ if (flow_flag_test(flow, FAILED))
+ continue;
+
+ parse_attr = attr->parse_attr;
+ spec = &parse_attr->spec;
+ err = mlx5e_tc_tun_route_lookup(priv, spec, attr);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
+ err);
+ continue;
+ }
+
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
+ err);
+ } else {
+ flow->rule[0] = rule;
+ flow_flag_set(flow, OFFLOADED);
+ }
+ }
+}
+
+static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
+ struct mlx5e_route_entry *r,
+ struct list_head *flow_list,
+ bool replace)
+{
+ struct net_device *tunnel_dev;
+ LIST_HEAD(decap_flows);
+
+ tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
+ if (!tunnel_dev)
+ return -ENODEV;
+
+ mlx5e_take_all_route_decap_flows(r, &decap_flows);
+ if (mlx5e_route_entry_valid(r))
+ mlx5e_unoffload_flow_list(priv, &decap_flows);
+ if (replace)
+ mlx5e_reoffload_decap(priv, &decap_flows);
+
+ list_splice(&decap_flows, flow_list);
+
+ return 0;
+}
+
+static void mlx5e_tc_fib_event_work(struct work_struct *work)
+{
+ struct mlx5e_tc_fib_event_data *event_data =
+ container_of(work, struct mlx5e_tc_fib_event_data, work);
+ struct net_device *ul_dev = event_data->ul_dev;
+ struct mlx5e_priv *priv = netdev_priv(ul_dev);
+ struct mlx5e_route_entry *r = event_data->r;
+ struct mlx5_eswitch *esw;
+ LIST_HEAD(flow_list);
+ bool replace;
+ int err;
+
+ /* sync with concurrent neigh updates */
+ rtnl_lock();
+ esw = priv->mdev->priv.eswitch;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
+
+ if (!mlx5e_route_entry_valid(r) && !replace)
+ goto out;
+
+ err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
+ if (err)
+ mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
+ err);
+
+ err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
+ if (err)
+ mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
+ err);
+
+ if (replace)
+ r->flags |= MLX5E_ROUTE_ENTRY_VALID;
+out:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ rtnl_unlock();
+
+ mlx5e_put_flow_list(priv, &flow_list);
+ mlx5e_route_put(priv, event_data->r);
+ dev_put(event_data->ul_dev);
+ kfree(event_data);
+}
+
+static struct mlx5e_tc_fib_event_data *
+mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
+ struct net_device *ul_dev,
+ struct mlx5e_tc_tun_encap *encap,
+ unsigned long event,
+ struct fib_notifier_info *info)
+{
+ struct fib_entry_notifier_info *fen_info;
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ struct net_device *fib_dev;
+
+ fen_info = container_of(info, struct fib_entry_notifier_info, info);
+ fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
+ if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
+ fen_info->dst_len != 32)
+ return NULL;
+
+ fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
+ if (!fib_work)
+ return ERR_PTR(-ENOMEM);
+
+ key.endpoint_ip.v4 = htonl(fen_info->dst);
+ key.ip_version = 4;
+
+ /* Can't fail after this point because releasing reference to r
+ * requires obtaining sleeping mutex which we can't do in atomic
+ * context.
+ */
+ r = mlx5e_route_lookup_for_update(encap, &key);
+ if (!r)
+ goto out;
+ fib_work->r = r;
+ dev_hold(ul_dev);
+
+ return fib_work;
+
+out:
+ kfree(fib_work);
+ return NULL;
+}
+
+static struct mlx5e_tc_fib_event_data *
+mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
+ struct net_device *ul_dev,
+ struct mlx5e_tc_tun_encap *encap,
+ unsigned long event,
+ struct fib_notifier_info *info)
+{
+ struct fib6_entry_notifier_info *fen_info;
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct mlx5e_route_entry *r;
+ struct mlx5e_route_key key;
+ struct net_device *fib_dev;
+
+ fen_info = container_of(info, struct fib6_entry_notifier_info, info);
+ fib_dev = fib6_info_nh_dev(fen_info->rt);
+ if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
+ fen_info->rt->fib6_dst.plen != 128)
+ return NULL;
+
+ fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
+ if (!fib_work)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
+ sizeof(fen_info->rt->fib6_dst.addr));
+ key.ip_version = 6;
+
+ /* Can't fail after this point because releasing reference to r
+ * requires obtaining sleeping mutex which we can't do in atomic
+ * context.
+ */
+ r = mlx5e_route_lookup_for_update(encap, &key);
+ if (!r)
+ goto out;
+ fib_work->r = r;
+ dev_hold(ul_dev);
+
+ return fib_work;
+
+out:
+ kfree(fib_work);
+ return NULL;
+}
+
+static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
+{
+ struct mlx5e_tc_fib_event_data *fib_work;
+ struct fib_notifier_info *info = ptr;
+ struct mlx5e_tc_tun_encap *encap;
+ struct net_device *ul_dev;
+ struct mlx5e_priv *priv;
+
+ encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
+ priv = encap->priv;
+ ul_dev = priv->netdev;
+ priv = netdev_priv(ul_dev);
+
+ switch (event) {
+ case FIB_EVENT_ENTRY_REPLACE:
+ case FIB_EVENT_ENTRY_DEL:
+ if (info->family == AF_INET)
+ fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
+ else if (info->family == AF_INET6)
+ fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
+ else
+ return NOTIFY_DONE;
+
+ if (!IS_ERR_OR_NULL(fib_work)) {
+ queue_work(priv->wq, &fib_work->work);
+ } else if (IS_ERR(fib_work)) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
+ mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
+ PTR_ERR(fib_work));
+ }
+
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_DONE;
+}
+
+struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_tun_encap *encap;
+ int err;
+
+ encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
+ if (!encap)
+ return ERR_PTR(-ENOMEM);
+
+ encap->priv = priv;
+ encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
+ spin_lock_init(&encap->route_lock);
+ hash_init(encap->route_tbl);
+ err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
+ NULL, NULL);
+ if (err) {
+ kvfree(encap);
+ return ERR_PTR(err);
+ }
+
+ return encap;
+}
+
+void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
+{
+ if (!encap)
+ return;
+
+ unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
+ flush_workqueue(encap->priv->wq); /* flush fib event works */
+ kvfree(encap);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
new file mode 100644
index 000000000000..3391504d9a08
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_TUN_ENCAP_H__
+#define __MLX5_EN_TC_TUN_ENCAP_H__
+
+#include "tc_priv.h"
+
+void mlx5e_detach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow, int out_index);
+
+int mlx5e_attach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct net_device *mirred_dev,
+ int out_index,
+ struct netlink_ext_ack *extack,
+ struct net_device **encap_dev,
+ bool *encap_valid);
+int mlx5e_attach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack);
+void mlx5e_detach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+
+int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+
+struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info);
+
+int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec);
+
+struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv);
+void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap);
+
+#endif /* __MLX5_EN_TC_TUN_ENCAP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 988195ab1c54..d1696404cca9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -59,6 +59,8 @@ struct mlx5e_neigh_update_table {
struct mlx5_tc_ct_priv;
struct mlx5e_rep_bond;
+struct mlx5e_tc_tun_encap;
+
struct mlx5_rep_uplink_priv {
/* Filters DB - instantiated by the uplink representor and shared by
* the uplink's VFs
@@ -90,6 +92,9 @@ struct mlx5_rep_uplink_priv {
/* support eswitch vports bonding */
struct mlx5e_rep_bond *bond;
+
+ /* tc tunneling encapsulation private data */
+ struct mlx5e_tc_tun_encap *encap;
};
struct mlx5e_rep_priv {
@@ -110,7 +115,6 @@ struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep)
}
struct mlx5e_neigh {
- struct net_device *dev;
union {
__be32 v4;
struct in6_addr v6;
@@ -122,6 +126,7 @@ struct mlx5e_neigh_hash_entry {
struct rhash_head rhash_node;
struct mlx5e_neigh m_neigh;
struct mlx5e_priv *priv;
+ struct net_device *neigh_dev;
/* Save the neigh hash entry in a list on the representor in
* addition to the hash table. In order to iterate easily over the
@@ -153,6 +158,7 @@ enum {
/* set when the encap entry is successfully offloaded into HW */
MLX5_ENCAP_ENTRY_VALID = BIT(0),
MLX5_REFORMAT_DECAP = BIT(1),
+ MLX5_ENCAP_ENTRY_NO_ROUTE = BIT(2),
};
struct mlx5e_decap_key {
@@ -175,12 +181,12 @@ struct mlx5e_encap_entry {
struct mlx5e_neigh_hash_entry *nhe;
/* neigh hash entry list of encaps sharing the same neigh */
struct list_head encap_list;
- struct mlx5e_neigh m_neigh;
/* a node of the eswitch encap hash table which keeping all the encap
* entries
*/
struct hlist_node encap_hlist;
struct list_head flows;
+ struct list_head route_list;
struct mlx5_pkt_reformat *pkt_reformat;
const struct ip_tunnel_info *tun_info;
unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 280ea1e1e039..db142ee96510 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -63,6 +63,8 @@
#include "en/mapping.h"
#include "en/tc_ct.h"
#include "en/mod_hdr.h"
+#include "en/tc_priv.h"
+#include "en/tc_tun_encap.h"
#include "lib/devcom.h"
#include "lib/geneve.h"
#include "lib/fs_chains.h"
@@ -71,90 +73,6 @@
#define nic_chains(priv) ((priv)->fs.tc.chains)
#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
-#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
-
-enum {
- MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT,
- MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT,
- MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
- MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
- MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
- MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE,
- MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1,
- MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2,
- MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3,
- MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4,
- MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5,
- MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6,
- MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7,
- MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
-};
-
-#define MLX5E_TC_MAX_SPLITS 1
-
-/* Helper struct for accessing a struct containing list_head array.
- * Containing struct
- * |- Helper array
- * [0] Helper item 0
- * |- list_head item 0
- * |- index (0)
- * [1] Helper item 1
- * |- list_head item 1
- * |- index (1)
- * To access the containing struct from one of the list_head items:
- * 1. Get the helper item from the list_head item using
- * helper item =
- * container_of(list_head item, helper struct type, list_head field)
- * 2. Get the contining struct from the helper item and its index in the array:
- * containing struct =
- * container_of(helper item, containing struct type, helper field[index])
- */
-struct encap_flow_item {
- struct mlx5e_encap_entry *e; /* attached encap instance */
- struct list_head list;
- int index;
-};
-
-struct mlx5e_tc_flow {
- struct rhash_head node;
- struct mlx5e_priv *priv;
- u64 cookie;
- unsigned long flags;
- struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
-
- /* flows sharing the same reformat object - currently mpls decap */
- struct list_head l3_to_l2_reformat;
- struct mlx5e_decap_entry *decap_reformat;
-
- /* Flow can be associated with multiple encap IDs.
- * The number of encaps is bounded by the number of supported
- * destinations.
- */
- struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
- struct mlx5e_tc_flow *peer_flow;
- struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */
- struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
- struct list_head hairpin; /* flows sharing the same hairpin */
- struct list_head peer; /* flows with peer flow */
- struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */
- struct net_device *orig_dev; /* netdev adding flow first */
- int tmp_efi_index;
- struct list_head tmp_list; /* temporary flow list used by neigh update */
- refcount_t refcnt;
- struct rcu_head rcu_head;
- struct completion init_done;
- int tunnel_id; /* the mapped tunnel id of this flow */
- struct mlx5_flow_attr *attr;
-};
-
-struct mlx5e_tc_flow_parse_attr {
- const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
- struct net_device *filter_dev;
- struct mlx5_flow_spec spec;
- struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
- int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
- struct ethhdr eth;
-};
#define MLX5E_TC_TABLE_NUM_GROUPS 4
#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
@@ -165,10 +83,15 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
.moffset = 0,
.mlen = 2,
},
+ [VPORT_TO_REG] = {
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
+ .moffset = 2,
+ .mlen = 2,
+ },
[TUNNEL_TO_REG] = {
.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
.moffset = 1,
- .mlen = 3,
+ .mlen = ((ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS) / 8),
.soffset = MLX5_BYTE_OFF(fte_match_param,
misc_parameters_2.metadata_reg_c_1),
},
@@ -247,11 +170,11 @@ mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
}
int
-mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
- enum mlx5_flow_namespace_type ns,
- enum mlx5e_tc_attr_to_reg type,
- u32 data)
+mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5_flow_namespace_type ns,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 data)
{
int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
@@ -275,9 +198,10 @@ mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
MLX5_SET(set_action_in, modact, offset, moffset * 8);
MLX5_SET(set_action_in, modact, length, mlen * 8);
MLX5_SET(set_action_in, modact, data, data);
+ err = mod_hdr_acts->num_actions;
mod_hdr_acts->num_actions++;
- return 0;
+ return err;
}
static struct mlx5_tc_ct_priv *
@@ -326,6 +250,41 @@ mlx5_tc_rule_delete(struct mlx5e_priv *priv,
mlx5e_del_offloaded_nic_rule(priv, rule, attr);
}
+int
+mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5_flow_namespace_type ns,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 data)
+{
+ int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data);
+
+ return ret < 0 ? ret : 0;
+}
+
+void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5e_tc_attr_to_reg type,
+ int act_id, u32 data)
+{
+ int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
+ int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
+ int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
+ char *modact;
+
+ modact = mod_hdr_acts->actions + (act_id * MLX5_MH_ACT_SZ);
+
+ /* Firmware has 5bit length field and 0 means 32bits */
+ if (mlen == 4)
+ mlen = 0;
+
+ MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, modact, field, mfield);
+ MLX5_SET(set_action_in, modact, offset, moffset * 8);
+ MLX5_SET(set_action_in, modact, length, mlen * 8);
+ MLX5_SET(set_action_in, modact, data, data);
+}
+
struct mlx5e_hairpin {
struct mlx5_hairpin *pair;
@@ -363,15 +322,14 @@ struct mlx5e_hairpin_entry {
static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow);
-static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
+struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
{
if (!flow || !refcount_inc_not_zero(&flow->refcnt))
return ERR_PTR(-EINVAL);
return flow;
}
-static void mlx5e_flow_put(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow)
+void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
{
if (refcount_dec_and_test(&flow->refcnt)) {
mlx5e_tc_del_flow(priv, flow);
@@ -379,48 +337,6 @@ static void mlx5e_flow_put(struct mlx5e_priv *priv,
}
}
-static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
-{
- /* Complete all memory stores before setting bit. */
- smp_mb__before_atomic();
- set_bit(flag, &flow->flags);
-}
-
-#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
-
-static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
- unsigned long flag)
-{
- /* test_and_set_bit() provides all necessary barriers */
- return test_and_set_bit(flag, &flow->flags);
-}
-
-#define flow_flag_test_and_set(flow, flag) \
- __flow_flag_test_and_set(flow, \
- MLX5E_TC_FLOW_FLAG_##flag)
-
-static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
-{
- /* Complete all memory stores before clearing bit. */
- smp_mb__before_atomic();
- clear_bit(flag, &flow->flags);
-}
-
-#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
- MLX5E_TC_FLOW_FLAG_##flag)
-
-static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
-{
- bool ret = test_bit(flag, &flow->flags);
-
- /* Read fields of flow structure only after checking flags. */
- smp_mb__after_atomic();
- return ret;
-}
-
-#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
- MLX5E_TC_FLOW_FLAG_##flag)
-
bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
{
return flow_flag_test(flow, ESWITCH);
@@ -431,7 +347,7 @@ static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
return flow_flag_test(flow, FT);
}
-static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
+bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
{
return flow_flag_test(flow, OFFLOADED);
}
@@ -1146,23 +1062,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
kfree(flow->attr);
}
-static void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow, int out_index);
-
-static int mlx5e_attach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- struct net_device *mirred_dev,
- int out_index,
- struct netlink_ext_ack *extack,
- struct net_device **encap_dev,
- bool *encap_valid);
-static int mlx5e_attach_decap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack);
-static void mlx5e_detach_decap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow);
-
-static struct mlx5_flow_handle *
+struct mlx5_flow_handle *
mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
@@ -1197,10 +1097,9 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
return rule;
}
-static void
-mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
- struct mlx5e_tc_flow *flow,
- struct mlx5_flow_attr *attr)
+void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr)
{
flow_flag_clear(flow, OFFLOADED);
@@ -1219,7 +1118,7 @@ offload_rule_0:
mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
}
-static struct mlx5_flow_handle *
+struct mlx5_flow_handle *
mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec)
@@ -1245,9 +1144,8 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
return rule;
}
-static void
-mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
- struct mlx5e_tc_flow *flow)
+void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow)
{
struct mlx5_flow_attr *slow_attr;
@@ -1315,6 +1213,63 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow)
mutex_unlock(&uplink_priv->unready_flows_lock);
}
+static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv);
+
+bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
+{
+ struct mlx5_core_dev *out_mdev, *route_mdev;
+ struct mlx5e_priv *out_priv, *route_priv;
+
+ out_priv = netdev_priv(out_dev);
+ out_mdev = out_priv->mdev;
+ route_priv = netdev_priv(route_dev);
+ route_mdev = route_priv->mdev;
+
+ if (out_mdev->coredev_type != MLX5_COREDEV_PF ||
+ route_mdev->coredev_type != MLX5_COREDEV_VF)
+ return false;
+
+ return same_hw_devs(out_priv, route_priv);
+}
+
+int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
+{
+ struct mlx5e_priv *out_priv, *route_priv;
+ struct mlx5_core_dev *route_mdev;
+ struct mlx5_eswitch *esw;
+ u16 vhca_id;
+ int err;
+
+ out_priv = netdev_priv(out_dev);
+ esw = out_priv->mdev->priv.eswitch;
+ route_priv = netdev_priv(route_dev);
+ route_mdev = route_priv->mdev;
+
+ vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
+ err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+ return err;
+}
+
+int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &parse_attr->mod_hdr_acts;
+ struct mlx5_modify_hdr *mod_hdr;
+
+ mod_hdr = mlx5_modify_header_alloc(priv->mdev,
+ get_flow_name_space(flow),
+ mod_hdr_acts->num_actions,
+ mod_hdr_acts->actions);
+ if (IS_ERR(mod_hdr))
+ return PTR_ERR(mod_hdr);
+
+ WARN_ON(flow->attr->modify_hdr);
+ flow->attr->modify_hdr = mod_hdr;
+
+ return 0;
+}
+
static int
mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
@@ -1324,11 +1279,11 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct net_device *out_dev, *encap_dev = NULL;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
+ bool vf_tun = false, encap_valid = true;
struct mlx5_esw_flow_attr *esw_attr;
struct mlx5_fc *counter = NULL;
struct mlx5e_rep_priv *rpriv;
struct mlx5e_priv *out_priv;
- bool encap_valid = true;
u32 max_prio, max_chain;
int err = 0;
int out_index;
@@ -1342,20 +1297,28 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
NL_SET_ERR_MSG_MOD(extack,
"Requested chain is out of supported range");
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto err_out;
}
max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
if (attr->prio > max_prio) {
NL_SET_ERR_MSG_MOD(extack,
"Requested priority is out of supported range");
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ if (flow_flag_test(flow, TUN_RX)) {
+ err = mlx5e_attach_decap_route(priv, flow);
+ if (err)
+ goto err_out;
}
if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
err = mlx5e_attach_decap(priv, flow, extack);
if (err)
- return err;
+ goto err_out;
}
parse_attr = attr->parse_attr;
@@ -1373,8 +1336,11 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
extack, &encap_dev, &encap_valid);
if (err)
- return err;
+ goto err_out;
+ if (esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+ vf_tun = true;
out_priv = netdev_priv(encap_dev);
rpriv = out_priv->ppriv;
esw_attr->dests[out_index].rep = rpriv->rep;
@@ -1383,20 +1349,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
err = mlx5_eswitch_add_vlan_action(esw, attr);
if (err)
- return err;
+ goto err_out;
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
!(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) {
- err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
- dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
- if (err)
- return err;
+ if (vf_tun) {
+ err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
+ if (err)
+ goto err_out;
+ } else {
+ err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
+ if (err)
+ goto err_out;
+ }
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
counter = mlx5_fc_create(esw_attr->counter_dev, true);
- if (IS_ERR(counter))
- return PTR_ERR(counter);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_out;
+ }
attr->counter = counter;
}
@@ -1410,12 +1383,17 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
else
flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
- if (IS_ERR(flow->rule[0]))
- return PTR_ERR(flow->rule[0]);
- else
- flow_flag_set(flow, OFFLOADED);
+ if (IS_ERR(flow->rule[0])) {
+ err = PTR_ERR(flow->rule[0]);
+ goto err_out;
+ }
+ flow_flag_set(flow, OFFLOADED);
return 0;
+
+err_out:
+ flow_flag_set(flow, FAILED);
+ return err;
}
static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
@@ -1436,8 +1414,11 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ bool vf_tun = false;
int out_index;
+ esw_attr = attr->esw_attr;
mlx5e_put_flow_tunnel_id(flow);
if (flow_flag_test(flow, NOT_READY))
@@ -1455,20 +1436,33 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5_eswitch_del_vlan_action(esw, attr);
- for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
- if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
+ if (flow->decap_route)
+ mlx5e_detach_decap_route(priv, flow);
+
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+ if (esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+ vf_tun = true;
+ if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
mlx5e_detach_encap(priv, flow, out_index);
kfree(attr->parse_attr->tun_info[out_index]);
}
- kvfree(attr->parse_attr);
+ }
mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
- mlx5e_detach_mod_hdr(priv, flow);
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ dealloc_mod_hdr_actions(&attr->parse_attr->mod_hdr_acts);
+ if (vf_tun && attr->modify_hdr)
+ mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
+ else
+ mlx5e_detach_mod_hdr(priv, flow);
+ }
+ kvfree(attr->parse_attr);
+ kvfree(attr->esw_attr->rx_tun_attr);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
- mlx5_fc_destroy(attr->esw_attr->counter_dev, attr->counter);
+ mlx5_fc_destroy(esw_attr->counter_dev, attr->counter);
if (flow_flag_test(flow, L3_TO_L2_DECAP))
mlx5e_detach_decap(priv, flow);
@@ -1476,141 +1470,13 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
kfree(flow->attr);
}
-void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e,
- struct list_head *flow_list)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *esw_attr;
- struct mlx5_flow_handle *rule;
- struct mlx5_flow_attr *attr;
- struct mlx5_flow_spec *spec;
- struct mlx5e_tc_flow *flow;
- int err;
-
- e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
- e->reformat_type,
- e->encap_size, e->encap_header,
- MLX5_FLOW_NAMESPACE_FDB);
- if (IS_ERR(e->pkt_reformat)) {
- mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
- PTR_ERR(e->pkt_reformat));
- return;
- }
- e->flags |= MLX5_ENCAP_ENTRY_VALID;
- mlx5e_rep_queue_neigh_stats_work(priv);
-
- list_for_each_entry(flow, flow_list, tmp_list) {
- bool all_flow_encaps_valid = true;
- int i;
-
- if (!mlx5e_is_offloaded_flow(flow))
- continue;
- attr = flow->attr;
- esw_attr = attr->esw_attr;
- spec = &attr->parse_attr->spec;
-
- esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
- esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
- /* Flow can be associated with multiple encap entries.
- * Before offloading the flow verify that all of them have
- * a valid neighbour.
- */
- for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
- if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
- continue;
- if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
- all_flow_encaps_valid = false;
- break;
- }
- }
- /* Do not offload flows with unresolved neighbors */
- if (!all_flow_encaps_valid)
- continue;
- /* update from slow path rule to encap rule */
- rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
- if (IS_ERR(rule)) {
- err = PTR_ERR(rule);
- mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
- err);
- continue;
- }
-
- mlx5e_tc_unoffload_from_slow_path(esw, flow);
- flow->rule[0] = rule;
- /* was unset when slow path rule removed */
- flow_flag_set(flow, OFFLOADED);
- }
-}
-
-void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e,
- struct list_head *flow_list)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *esw_attr;
- struct mlx5_flow_handle *rule;
- struct mlx5_flow_attr *attr;
- struct mlx5_flow_spec *spec;
- struct mlx5e_tc_flow *flow;
- int err;
-
- list_for_each_entry(flow, flow_list, tmp_list) {
- if (!mlx5e_is_offloaded_flow(flow))
- continue;
- attr = flow->attr;
- esw_attr = attr->esw_attr;
- spec = &attr->parse_attr->spec;
-
- /* update from encap rule to slow path rule */
- rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
- /* mark the flow's encap dest as non-valid */
- esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
-
- if (IS_ERR(rule)) {
- err = PTR_ERR(rule);
- mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
- err);
- continue;
- }
-
- mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
- flow->rule[0] = rule;
- /* was unset when fast path rule removed */
- flow_flag_set(flow, OFFLOADED);
- }
-
- /* we know that the encap is valid */
- e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
- mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
-}
-
-static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
+struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
{
return flow->attr->counter;
}
-/* Takes reference to all flows attached to encap and adds the flows to
- * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
- */
-void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
-{
- struct encap_flow_item *efi;
- struct mlx5e_tc_flow *flow;
-
- list_for_each_entry(efi, &e->flows, list) {
- flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
- if (IS_ERR(mlx5e_flow_get(flow)))
- continue;
- wait_for_completion(&flow->init_done);
-
- flow->tmp_efi_index = efi->index;
- list_add(&flow->tmp_list, flow_list);
- }
-}
-
/* Iterate over tmp_list of flows attached to flow_list head. */
-void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
+void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
{
struct mlx5e_tc_flow *flow, *tmp;
@@ -1618,222 +1484,6 @@ void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_l
mlx5e_flow_put(priv, flow);
}
-static struct mlx5e_encap_entry *
-mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
- struct mlx5e_encap_entry *e)
-{
- struct mlx5e_encap_entry *next = NULL;
-
-retry:
- rcu_read_lock();
-
- /* find encap with non-zero reference counter value */
- for (next = e ?
- list_next_or_null_rcu(&nhe->encap_list,
- &e->encap_list,
- struct mlx5e_encap_entry,
- encap_list) :
- list_first_or_null_rcu(&nhe->encap_list,
- struct mlx5e_encap_entry,
- encap_list);
- next;
- next = list_next_or_null_rcu(&nhe->encap_list,
- &next->encap_list,
- struct mlx5e_encap_entry,
- encap_list))
- if (mlx5e_encap_take(next))
- break;
-
- rcu_read_unlock();
-
- /* release starting encap */
- if (e)
- mlx5e_encap_put(netdev_priv(e->out_dev), e);
- if (!next)
- return next;
-
- /* wait for encap to be fully initialized */
- wait_for_completion(&next->res_ready);
- /* continue searching if encap entry is not in valid state after completion */
- if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
- e = next;
- goto retry;
- }
-
- return next;
-}
-
-void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
-{
- struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
- struct mlx5e_encap_entry *e = NULL;
- struct mlx5e_tc_flow *flow;
- struct mlx5_fc *counter;
- struct neigh_table *tbl;
- bool neigh_used = false;
- struct neighbour *n;
- u64 lastuse;
-
- if (m_neigh->family == AF_INET)
- tbl = &arp_tbl;
-#if IS_ENABLED(CONFIG_IPV6)
- else if (m_neigh->family == AF_INET6)
- tbl = ipv6_stub->nd_tbl;
-#endif
- else
- return;
-
- /* mlx5e_get_next_valid_encap() releases previous encap before returning
- * next one.
- */
- while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
- struct mlx5e_priv *priv = netdev_priv(e->out_dev);
- struct encap_flow_item *efi, *tmp;
- struct mlx5_eswitch *esw;
- LIST_HEAD(flow_list);
-
- esw = priv->mdev->priv.eswitch;
- mutex_lock(&esw->offloads.encap_tbl_lock);
- list_for_each_entry_safe(efi, tmp, &e->flows, list) {
- flow = container_of(efi, struct mlx5e_tc_flow,
- encaps[efi->index]);
- if (IS_ERR(mlx5e_flow_get(flow)))
- continue;
- list_add(&flow->tmp_list, &flow_list);
-
- if (mlx5e_is_offloaded_flow(flow)) {
- counter = mlx5e_tc_get_counter(flow);
- lastuse = mlx5_fc_query_lastuse(counter);
- if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
- neigh_used = true;
- break;
- }
- }
- }
- mutex_unlock(&esw->offloads.encap_tbl_lock);
-
- mlx5e_put_encap_flow_list(priv, &flow_list);
- if (neigh_used) {
- /* release current encap before breaking the loop */
- mlx5e_encap_put(priv, e);
- break;
- }
- }
-
- trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
-
- if (neigh_used) {
- nhe->reported_lastuse = jiffies;
-
- /* find the relevant neigh according to the cached device and
- * dst ip pair
- */
- n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
- if (!n)
- return;
-
- neigh_event_send(n, NULL);
- neigh_release(n);
- }
-}
-
-static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
-{
- WARN_ON(!list_empty(&e->flows));
-
- if (e->compl_result > 0) {
- mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
-
- if (e->flags & MLX5_ENCAP_ENTRY_VALID)
- mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
- }
-
- kfree(e->tun_info);
- kfree(e->encap_header);
- kfree_rcu(e, rcu);
-}
-
-static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
- struct mlx5e_decap_entry *d)
-{
- WARN_ON(!list_empty(&d->flows));
-
- if (!d->compl_result)
- mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
-
- kfree_rcu(d, rcu);
-}
-
-void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-
- if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
- return;
- hash_del_rcu(&e->encap_hlist);
- mutex_unlock(&esw->offloads.encap_tbl_lock);
-
- mlx5e_encap_dealloc(priv, e);
-}
-
-static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-
- if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
- return;
- hash_del_rcu(&d->hlist);
- mutex_unlock(&esw->offloads.decap_tbl_lock);
-
- mlx5e_decap_dealloc(priv, d);
-}
-
-static void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow, int out_index)
-{
- struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-
- /* flow wasn't fully initialized */
- if (!e)
- return;
-
- mutex_lock(&esw->offloads.encap_tbl_lock);
- list_del(&flow->encaps[out_index].list);
- flow->encaps[out_index].e = NULL;
- if (!refcount_dec_and_test(&e->refcnt)) {
- mutex_unlock(&esw->offloads.encap_tbl_lock);
- return;
- }
- hash_del_rcu(&e->encap_hlist);
- mutex_unlock(&esw->offloads.encap_tbl_lock);
-
- mlx5e_encap_dealloc(priv, e);
-}
-
-static void mlx5e_detach_decap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5e_decap_entry *d = flow->decap_reformat;
-
- if (!d)
- return;
-
- mutex_lock(&esw->offloads.decap_tbl_lock);
- list_del(&flow->l3_to_l2_reformat);
- flow->decap_reformat = NULL;
-
- if (!refcount_dec_and_test(&d->refcnt)) {
- mutex_unlock(&esw->offloads.decap_tbl_lock);
- return;
- }
- hash_del_rcu(&d->hlist);
- mutex_unlock(&esw->offloads.decap_tbl_lock);
-
- mlx5e_decap_dealloc(priv, d);
-}
-
static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
{
struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
@@ -2091,6 +1741,29 @@ void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
}
}
+u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
+{
+ void *headers_v;
+ u16 ethertype;
+ u8 ip_version;
+
+ if (outer)
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
+ else
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
+
+ ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version);
+ /* Return ip_version converted from ethertype anyway */
+ if (!ip_version) {
+ ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
+ if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP)
+ ip_version = 4;
+ else if (ethertype == ETH_P_IPV6)
+ ip_version = 6;
+ }
+ return ip_version;
+}
+
static int parse_tunnel_attr(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
@@ -2099,6 +1772,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
u8 *match_level,
bool *match_inner)
{
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct netlink_ext_ack *extack = f->common.extack;
bool needs_mapping, sets_mapping;
@@ -2136,6 +1810,31 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
*/
if (!netif_is_bareudp(filter_dev))
flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ err = mlx5e_tc_set_attr_rx_tun(flow, spec);
+ if (err)
+ return err;
+ } else if (tunnel && tunnel->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
+ struct mlx5_flow_spec *tmp_spec;
+
+ tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL);
+ if (!tmp_spec) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for vxlan tmp spec");
+ netdev_warn(priv->netdev, "Failed to allocate memory for vxlan tmp spec");
+ return -ENOMEM;
+ }
+ memcpy(tmp_spec, spec, sizeof(*tmp_spec));
+
+ err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
+ if (err) {
+ kvfree(tmp_spec);
+ NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
+ netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
+ return err;
+ }
+ err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
+ kvfree(tmp_spec);
+ if (err)
+ return err;
}
if (!needs_mapping && !sets_mapping)
@@ -3584,35 +3283,6 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
return 0;
}
-struct encap_key {
- const struct ip_tunnel_key *ip_tun_key;
- struct mlx5e_tc_tunnel *tc_tunnel;
-};
-
-static inline int cmp_encap_info(struct encap_key *a,
- struct encap_key *b)
-{
- return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
- a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
-}
-
-static inline int cmp_decap_info(struct mlx5e_decap_key *a,
- struct mlx5e_decap_key *b)
-{
- return memcmp(&a->key, &b->key, sizeof(b->key));
-}
-
-static inline int hash_encap_info(struct encap_key *key)
-{
- return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
- key->tc_tunnel->tunnel_type);
-}
-
-static inline int hash_decap_info(struct mlx5e_decap_key *key)
-{
- return jhash(&key->key, sizeof(key->key), 0);
-}
-
static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
struct net_device *peer_netdev)
{
@@ -3626,277 +3296,6 @@ static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
same_hw_devs(priv, peer_priv));
}
-bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
-{
- return refcount_inc_not_zero(&e->refcnt);
-}
-
-static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
-{
- return refcount_inc_not_zero(&e->refcnt);
-}
-
-static struct mlx5e_encap_entry *
-mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
- uintptr_t hash_key)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5e_encap_entry *e;
- struct encap_key e_key;
-
- hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
- encap_hlist, hash_key) {
- e_key.ip_tun_key = &e->tun_info->key;
- e_key.tc_tunnel = e->tunnel;
- if (!cmp_encap_info(&e_key, key) &&
- mlx5e_encap_take(e))
- return e;
- }
-
- return NULL;
-}
-
-static struct mlx5e_decap_entry *
-mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
- uintptr_t hash_key)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5e_decap_key r_key;
- struct mlx5e_decap_entry *e;
-
- hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
- hlist, hash_key) {
- r_key = e->key;
- if (!cmp_decap_info(&r_key, key) &&
- mlx5e_decap_take(e))
- return e;
- }
- return NULL;
-}
-
-static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
-{
- size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
-
- return kmemdup(tun_info, tun_size, GFP_KERNEL);
-}
-
-static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- int out_index,
- struct mlx5e_encap_entry *e,
- struct netlink_ext_ack *extack)
-{
- int i;
-
- for (i = 0; i < out_index; i++) {
- if (flow->encaps[i].e != e)
- continue;
- NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
- netdev_err(priv->netdev, "can't duplicate encap action\n");
- return true;
- }
-
- return false;
-}
-
-static int mlx5e_attach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- struct net_device *mirred_dev,
- int out_index,
- struct netlink_ext_ack *extack,
- struct net_device **encap_dev,
- bool *encap_valid)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct mlx5_flow_attr *attr = flow->attr;
- const struct ip_tunnel_info *tun_info;
- struct encap_key key;
- struct mlx5e_encap_entry *e;
- unsigned short family;
- uintptr_t hash_key;
- int err = 0;
-
- parse_attr = attr->parse_attr;
- tun_info = parse_attr->tun_info[out_index];
- family = ip_tunnel_info_af(tun_info);
- key.ip_tun_key = &tun_info->key;
- key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
- if (!key.tc_tunnel) {
- NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
- return -EOPNOTSUPP;
- }
-
- hash_key = hash_encap_info(&key);
-
- mutex_lock(&esw->offloads.encap_tbl_lock);
- e = mlx5e_encap_get(priv, &key, hash_key);
-
- /* must verify if encap is valid or not */
- if (e) {
- /* Check that entry was not already attached to this flow */
- if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
- err = -EOPNOTSUPP;
- goto out_err;
- }
-
- mutex_unlock(&esw->offloads.encap_tbl_lock);
- wait_for_completion(&e->res_ready);
-
- /* Protect against concurrent neigh update. */
- mutex_lock(&esw->offloads.encap_tbl_lock);
- if (e->compl_result < 0) {
- err = -EREMOTEIO;
- goto out_err;
- }
- goto attach_flow;
- }
-
- e = kzalloc(sizeof(*e), GFP_KERNEL);
- if (!e) {
- err = -ENOMEM;
- goto out_err;
- }
-
- refcount_set(&e->refcnt, 1);
- init_completion(&e->res_ready);
-
- tun_info = dup_tun_info(tun_info);
- if (!tun_info) {
- err = -ENOMEM;
- goto out_err_init;
- }
- e->tun_info = tun_info;
- err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
- if (err)
- goto out_err_init;
-
- INIT_LIST_HEAD(&e->flows);
- hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
- mutex_unlock(&esw->offloads.encap_tbl_lock);
-
- if (family == AF_INET)
- err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
- else if (family == AF_INET6)
- err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
-
- /* Protect against concurrent neigh update. */
- mutex_lock(&esw->offloads.encap_tbl_lock);
- complete_all(&e->res_ready);
- if (err) {
- e->compl_result = err;
- goto out_err;
- }
- e->compl_result = 1;
-
-attach_flow:
- flow->encaps[out_index].e = e;
- list_add(&flow->encaps[out_index].list, &e->flows);
- flow->encaps[out_index].index = out_index;
- *encap_dev = e->out_dev;
- if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
- attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
- attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
- *encap_valid = true;
- } else {
- *encap_valid = false;
- }
- mutex_unlock(&esw->offloads.encap_tbl_lock);
-
- return err;
-
-out_err:
- mutex_unlock(&esw->offloads.encap_tbl_lock);
- if (e)
- mlx5e_encap_put(priv, e);
- return err;
-
-out_err_init:
- mutex_unlock(&esw->offloads.encap_tbl_lock);
- kfree(tun_info);
- kfree(e);
- return err;
-}
-
-static int mlx5e_attach_decap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
- struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct mlx5e_decap_entry *d;
- struct mlx5e_decap_key key;
- uintptr_t hash_key;
- int err = 0;
-
- parse_attr = flow->attr->parse_attr;
- if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
- NL_SET_ERR_MSG_MOD(extack,
- "encap header larger than max supported");
- return -EOPNOTSUPP;
- }
-
- key.key = parse_attr->eth;
- hash_key = hash_decap_info(&key);
- mutex_lock(&esw->offloads.decap_tbl_lock);
- d = mlx5e_decap_get(priv, &key, hash_key);
- if (d) {
- mutex_unlock(&esw->offloads.decap_tbl_lock);
- wait_for_completion(&d->res_ready);
- mutex_lock(&esw->offloads.decap_tbl_lock);
- if (d->compl_result) {
- err = -EREMOTEIO;
- goto out_free;
- }
- goto found;
- }
-
- d = kzalloc(sizeof(*d), GFP_KERNEL);
- if (!d) {
- err = -ENOMEM;
- goto out_err;
- }
-
- d->key = key;
- refcount_set(&d->refcnt, 1);
- init_completion(&d->res_ready);
- INIT_LIST_HEAD(&d->flows);
- hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
- mutex_unlock(&esw->offloads.decap_tbl_lock);
-
- d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
- MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
- sizeof(parse_attr->eth),
- &parse_attr->eth,
- MLX5_FLOW_NAMESPACE_FDB);
- if (IS_ERR(d->pkt_reformat)) {
- err = PTR_ERR(d->pkt_reformat);
- d->compl_result = err;
- }
- mutex_lock(&esw->offloads.decap_tbl_lock);
- complete_all(&d->res_ready);
- if (err)
- goto out_free;
-
-found:
- flow->decap_reformat = d;
- attr->decap_pkt_reformat = d->pkt_reformat;
- list_add(&flow->l3_to_l2_reformat, &d->flows);
- mutex_unlock(&esw->offloads.decap_tbl_lock);
- return 0;
-
-out_free:
- mutex_unlock(&esw->offloads.decap_tbl_lock);
- mlx5e_decap_put(priv, d);
- return err;
-
-out_err:
- mutex_unlock(&esw->offloads.decap_tbl_lock);
- return err;
-}
-
static int parse_tc_vlan_action(struct mlx5e_priv *priv,
const struct flow_action_entry *act,
struct mlx5_esw_flow_attr *attr,
@@ -4249,7 +3648,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
if (encap) {
parse_attr->mirred_ifindex[esw_attr->out_count] =
out_dev->ifindex;
- parse_attr->tun_info[esw_attr->out_count] = dup_tun_info(info);
+ parse_attr->tun_info[esw_attr->out_count] =
+ mlx5e_dup_tun_info(info);
if (!parse_attr->tun_info[esw_attr->out_count])
return -ENOMEM;
encap = false;
@@ -4386,6 +3786,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
}
}
+ /* always set IP version for indirect table handling */
+ attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true);
+
if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
/* For prio tag mode, replace vlan pop with rewrite vlan prio
@@ -4666,7 +4069,6 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
return flow;
err_free:
- dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
mlx5e_flow_put(priv, flow);
out:
return ERR_PTR(err);
@@ -4811,6 +4213,7 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
return 0;
err_free:
+ flow_flag_set(flow, FAILED);
dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
mlx5e_flow_put(priv, flow);
out:
@@ -5332,7 +4735,8 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
}
uplink_priv->tunnel_mapping = mapping;
- mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true);
+ /* 0xFFF is reserved for stack devices slow path table mark */
+ mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true);
if (IS_ERR(mapping)) {
err = PTR_ERR(mapping);
goto err_enc_opts_mapping;
@@ -5345,8 +4749,14 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
+ uplink_priv->encap = mlx5e_tc_tun_init(priv);
+ if (IS_ERR(uplink_priv->encap))
+ goto err_register_fib_notifier;
+
return err;
+err_register_fib_notifier:
+ rhashtable_destroy(tc_ht);
err_ht_init:
mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
err_enc_opts_mapping:
@@ -5363,10 +4773,11 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
{
struct mlx5_rep_uplink_priv *uplink_priv;
- rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
-
uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
+ rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
+ mlx5e_tc_tun_cleanup(uplink_priv->encap);
+
mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
mapping_destroy(uplink_priv->tunnel_mapping);
@@ -5466,7 +4877,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
tc_skb_ext->chain = chain;
zone_restore_id = (reg_b >> REG_MAPPING_SHIFT(NIC_ZONE_RESTORE_TO_REG)) &
- ZONE_RESTORE_MAX;
+ ESW_ZONE_ID_MASK;
if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
zone_restore_id))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 4a2ce241522e..89003ae7775a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -37,6 +37,8 @@
#include "en.h"
#include "eswitch.h"
#include "en/tc_ct.h"
+#include "en/tc_tun.h"
+#include "en_rep.h"
#define MLX5E_TC_FLOW_ID_MASK 0x0000ffff
@@ -76,6 +78,7 @@ struct mlx5_flow_attr {
struct mlx5_flow_table *dest_ft;
u8 inner_match_level;
u8 outer_match_level;
+ u8 ip_version;
u32 flags;
union {
struct mlx5_esw_flow_attr esw_attr[0];
@@ -83,6 +86,19 @@ struct mlx5_flow_attr {
};
};
+struct mlx5_rx_tun_attr {
+ u16 decap_vport;
+ union {
+ __be32 v4;
+ struct in6_addr v6;
+ } src_ip; /* Valid if decap_vport is not zero */
+ union {
+ __be32 v4;
+ struct in6_addr v6;
+ } dst_ip; /* Valid if decap_vport is not zero */
+ u32 vni;
+};
+
#define MLX5E_TC_TABLE_CHAIN_TAG_BITS 16
#define MLX5E_TC_TABLE_CHAIN_TAG_MASK GENMASK(MLX5E_TC_TABLE_CHAIN_TAG_BITS - 1, 0)
@@ -158,7 +174,7 @@ bool mlx5e_encap_take(struct mlx5e_encap_entry *e);
void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e);
void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list);
-void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list);
+void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list);
struct mlx5e_neigh_hash_entry;
void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
@@ -167,6 +183,7 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
enum mlx5e_tc_attr_to_reg {
CHAIN_TO_REG,
+ VPORT_TO_REG,
TUNNEL_TO_REG,
CTSTATE_TO_REG,
ZONE_TO_REG,
@@ -197,6 +214,11 @@ int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
enum mlx5e_tc_attr_to_reg type,
u32 data);
+void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5e_tc_attr_to_reg type,
+ int act_id, u32 data);
+
void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
enum mlx5e_tc_attr_to_reg type,
u32 data,
@@ -207,6 +229,16 @@ void mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
u32 *data,
u32 *mask);
+int mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5_flow_namespace_type ns,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 data);
+
+int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow);
+
int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
int namespace,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
@@ -242,6 +274,10 @@ mlx5_tc_rule_delete(struct mlx5e_priv *priv,
struct mlx5_flow_handle *rule,
struct mlx5_flow_attr *attr);
+bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev);
+int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev,
+ u16 *vport);
+
#else /* CONFIG_MLX5_CLS_ACT */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
@@ -283,7 +319,7 @@ static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe)
reg_b = be32_to_cpu(cqe->ft_metadata);
- if (reg_b >> (MLX5E_TC_TABLE_CHAIN_TAG_BITS + ZONE_RESTORE_BITS))
+ if (reg_b >> (MLX5E_TC_TABLE_CHAIN_TAG_BITS + ESW_ZONE_ID_BITS))
return false;
chain = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
new file mode 100644
index 000000000000..b7d00c4c7046
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
@@ -0,0 +1,517 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <linux/etherdevice.h>
+#include <linux/idr.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "en.h"
+#include "en_tc.h"
+#include "fs_core.h"
+#include "esw/indir_table.h"
+#include "lib/fs_chains.h"
+
+#define MLX5_ESW_INDIR_TABLE_SIZE 128
+#define MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX (MLX5_ESW_INDIR_TABLE_SIZE - 2)
+#define MLX5_ESW_INDIR_TABLE_FWD_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 1)
+
+struct mlx5_esw_indir_table_rule {
+ struct list_head list;
+ struct mlx5_flow_handle *handle;
+ union {
+ __be32 v4;
+ struct in6_addr v6;
+ } dst_ip;
+ u32 vni;
+ struct mlx5_modify_hdr *mh;
+ refcount_t refcnt;
+};
+
+struct mlx5_esw_indir_table_entry {
+ struct hlist_node hlist;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *recirc_grp;
+ struct mlx5_flow_group *fwd_grp;
+ struct mlx5_flow_handle *fwd_rule;
+ struct list_head recirc_rules;
+ int recirc_cnt;
+ int fwd_ref;
+
+ u16 vport;
+ u8 ip_version;
+};
+
+struct mlx5_esw_indir_table {
+ struct mutex lock; /* protects table */
+ DECLARE_HASHTABLE(table, 8);
+};
+
+struct mlx5_esw_indir_table *
+mlx5_esw_indir_table_init(void)
+{
+ struct mlx5_esw_indir_table *indir = kvzalloc(sizeof(*indir), GFP_KERNEL);
+
+ if (!indir)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&indir->lock);
+ hash_init(indir->table);
+ return indir;
+}
+
+void
+mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir)
+{
+ mutex_destroy(&indir->lock);
+ kvfree(indir);
+}
+
+bool
+mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport_num,
+ struct mlx5_core_dev *dest_mdev)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+
+ /* Use indirect table for all IP traffic from UL to VF with vport
+ * destination when source rewrite flag is set.
+ */
+ return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK &&
+ mlx5_eswitch_is_vf_vport(esw, vport_num) &&
+ esw->dev == dest_mdev &&
+ attr->ip_version &&
+ attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
+}
+
+u16
+mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+
+ return esw_attr->rx_tun_attr ? esw_attr->rx_tun_attr->decap_vport : 0;
+}
+
+static struct mlx5_esw_indir_table_rule *
+mlx5_esw_indir_table_rule_lookup(struct mlx5_esw_indir_table_entry *e,
+ struct mlx5_esw_flow_attr *attr)
+{
+ struct mlx5_esw_indir_table_rule *rule;
+
+ list_for_each_entry(rule, &e->recirc_rules, list)
+ if (rule->vni == attr->rx_tun_attr->vni &&
+ !memcmp(&rule->dst_ip, &attr->rx_tun_attr->dst_ip,
+ sizeof(attr->rx_tun_attr->dst_ip)))
+ goto found;
+ return NULL;
+
+found:
+ refcount_inc(&rule->refcnt);
+ return rule;
+}
+
+static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_indir_table_entry *e)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ struct mlx5e_tc_mod_hdr_acts mod_acts = {};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_esw_indir_table_rule *rule;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *rule_spec;
+ struct mlx5_flow_handle *handle;
+ int err = 0;
+ u32 data;
+
+ rule = mlx5_esw_indir_table_rule_lookup(e, esw_attr);
+ if (rule)
+ return 0;
+
+ if (e->recirc_cnt == MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX)
+ return -EINVAL;
+
+ rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+ if (!rule_spec)
+ return -ENOMEM;
+
+ rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+ if (!rule) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
+ MLX5_MATCH_MISC_PARAMETERS |
+ MLX5_MATCH_MISC_PARAMETERS_2;
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) {
+ MLX5_SET(fte_match_param, rule_spec->match_criteria,
+ outer_headers.ip_version, 0xf);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version,
+ attr->ip_version);
+ } else if (attr->ip_version) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.ethertype);
+ MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ethertype,
+ (attr->ip_version == 4 ? ETH_P_IP : ETH_P_IPV6));
+ } else {
+ err = -EOPNOTSUPP;
+ goto err_ethertype;
+ }
+
+ if (attr->ip_version == 4) {
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ MLX5_SET(fte_match_param, rule_spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+ ntohl(esw_attr->rx_tun_attr->dst_ip.v4));
+ } else if (attr->ip_version == 6) {
+ int len = sizeof(struct in6_addr);
+
+ memset(MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xff, len);
+ memcpy(MLX5_ADDR_OF(fte_match_param, rule_spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &esw_attr->rx_tun_attr->dst_ip.v6, len);
+ }
+
+ MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+ misc_parameters.vxlan_vni);
+ MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters.vxlan_vni,
+ MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni));
+
+ MLX5_SET(fte_match_param, rule_spec->match_criteria,
+ misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw_attr->in_mdev->priv.eswitch,
+ MLX5_VPORT_UPLINK));
+
+ /* Modify flow source to recirculate packet */
+ data = mlx5_eswitch_get_vport_metadata_for_set(esw, esw_attr->rx_tun_attr->decap_vport);
+ err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
+ VPORT_TO_REG, data);
+ if (err)
+ goto err_mod_hdr_regc0;
+
+ err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
+ TUNNEL_TO_REG, ESW_TUN_SLOW_TABLE_GOTO_VPORT);
+ if (err)
+ goto err_mod_hdr_regc1;
+
+ flow_act.modify_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB,
+ mod_acts.num_actions, mod_acts.actions);
+ if (IS_ERR(flow_act.modify_hdr)) {
+ err = PTR_ERR(flow_act.modify_hdr);
+ goto err_mod_hdr_alloc;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = mlx5_chains_get_table(chains, 0, 1, 0);
+ if (!dest.ft) {
+ err = PTR_ERR(dest.ft);
+ goto err_table;
+ }
+ handle = mlx5_add_flow_rules(e->ft, rule_spec, &flow_act, &dest, 1);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto err_handle;
+ }
+
+ dealloc_mod_hdr_actions(&mod_acts);
+ rule->handle = handle;
+ rule->vni = esw_attr->rx_tun_attr->vni;
+ rule->mh = flow_act.modify_hdr;
+ memcpy(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip,
+ sizeof(esw_attr->rx_tun_attr->dst_ip));
+ refcount_set(&rule->refcnt, 1);
+ list_add(&rule->list, &e->recirc_rules);
+ e->recirc_cnt++;
+ goto out;
+
+err_handle:
+ mlx5_chains_put_table(chains, 0, 1, 0);
+err_table:
+ mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr);
+err_mod_hdr_alloc:
+err_mod_hdr_regc1:
+ dealloc_mod_hdr_actions(&mod_acts);
+err_mod_hdr_regc0:
+err_ethertype:
+ kfree(rule);
+out:
+ kfree(rule_spec);
+ return err;
+}
+
+static void mlx5_esw_indir_table_rule_put(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_esw_indir_table_entry *e)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ struct mlx5_esw_indir_table_rule *rule;
+
+ list_for_each_entry(rule, &e->recirc_rules, list)
+ if (rule->vni == esw_attr->rx_tun_attr->vni &&
+ !memcmp(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip,
+ sizeof(esw_attr->rx_tun_attr->dst_ip)))
+ goto found;
+
+ return;
+
+found:
+ if (!refcount_dec_and_test(&rule->refcnt))
+ return;
+
+ mlx5_del_flow_rules(rule->handle);
+ mlx5_chains_put_table(chains, 0, 1, 0);
+ mlx5_modify_header_dealloc(esw->dev, rule->mh);
+ list_del(&rule->list);
+ kfree(rule);
+ e->recirc_cnt--;
+}
+
+static int mlx5_create_indir_recirc_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_indir_table_entry *e)
+{
+ int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ u32 *in, *match;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS |
+ MLX5_MATCH_MISC_PARAMETERS | MLX5_MATCH_MISC_PARAMETERS_2);
+ match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version))
+ MLX5_SET(fte_match_param, match, outer_headers.ip_version, 0xf);
+ else
+ MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ethertype);
+
+ if (attr->ip_version == 4) {
+ MLX5_SET_TO_ONES(fte_match_param, match,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ } else if (attr->ip_version == 6) {
+ memset(MLX5_ADDR_OF(fte_match_param, match,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xff, sizeof(struct in6_addr));
+ } else {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters.vxlan_vni);
+ MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(create_flow_group_in, in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX);
+ e->recirc_grp = mlx5_create_flow_group(e->ft, in);
+ if (IS_ERR(e->recirc_grp)) {
+ err = PTR_ERR(e->recirc_grp);
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&e->recirc_rules);
+ e->recirc_cnt = 0;
+
+out:
+ kfree(in);
+ return err;
+}
+
+static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw,
+ struct mlx5_esw_indir_table_entry *e)
+{
+ int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ u32 *in;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ kfree(in);
+ return -ENOMEM;
+ }
+
+ /* Hold one entry */
+ MLX5_SET(create_flow_group_in, in, start_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX);
+ e->fwd_grp = mlx5_create_flow_group(e->ft, in);
+ if (IS_ERR(e->fwd_grp)) {
+ err = PTR_ERR(e->fwd_grp);
+ goto err_out;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = e->vport;
+ dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
+ e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1);
+ if (IS_ERR(e->fwd_rule)) {
+ mlx5_destroy_flow_group(e->fwd_grp);
+ err = PTR_ERR(e->fwd_rule);
+ }
+
+err_out:
+ kfree(spec);
+ kfree(in);
+ return err;
+}
+
+static struct mlx5_esw_indir_table_entry *
+mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec, u16 vport, bool decap)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_esw_indir_table_entry *e;
+ struct mlx5_flow_table *ft;
+ int err = 0;
+
+ root_ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns)
+ return ERR_PTR(-ENOENT);
+
+ e = kzalloc(sizeof(*e), GFP_KERNEL);
+ if (!e)
+ return ERR_PTR(-ENOMEM);
+
+ ft_attr.prio = FDB_TC_OFFLOAD;
+ ft_attr.max_fte = MLX5_ESW_INDIR_TABLE_SIZE;
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.level = 1;
+
+ ft = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto tbl_err;
+ }
+ e->ft = ft;
+ e->vport = vport;
+ e->ip_version = attr->ip_version;
+ e->fwd_ref = !decap;
+
+ err = mlx5_create_indir_recirc_group(esw, attr, spec, e);
+ if (err)
+ goto recirc_grp_err;
+
+ if (decap) {
+ err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e);
+ if (err)
+ goto recirc_rule_err;
+ }
+
+ err = mlx5_create_indir_fwd_group(esw, e);
+ if (err)
+ goto fwd_grp_err;
+
+ hash_add(esw->fdb_table.offloads.indir->table, &e->hlist,
+ vport << 16 | attr->ip_version);
+
+ return e;
+
+fwd_grp_err:
+ if (decap)
+ mlx5_esw_indir_table_rule_put(esw, attr, e);
+recirc_rule_err:
+ mlx5_destroy_flow_group(e->recirc_grp);
+recirc_grp_err:
+ mlx5_destroy_flow_table(e->ft);
+tbl_err:
+ kfree(e);
+ return ERR_PTR(err);
+}
+
+static struct mlx5_esw_indir_table_entry *
+mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport, u8 ip_version)
+{
+ struct mlx5_esw_indir_table_entry *e;
+ u32 key = vport << 16 | ip_version;
+
+ hash_for_each_possible(esw->fdb_table.offloads.indir->table, e, hlist, key)
+ if (e->vport == vport && e->ip_version == ip_version)
+ return e;
+
+ return NULL;
+}
+
+struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec,
+ u16 vport, bool decap)
+{
+ struct mlx5_esw_indir_table_entry *e;
+ int err;
+
+ mutex_lock(&esw->fdb_table.offloads.indir->lock);
+ e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version);
+ if (e) {
+ if (!decap) {
+ e->fwd_ref++;
+ } else {
+ err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e);
+ if (err)
+ goto out_err;
+ }
+ } else {
+ e = mlx5_esw_indir_table_entry_create(esw, attr, spec, vport, decap);
+ if (IS_ERR(e)) {
+ err = PTR_ERR(e);
+ esw_warn(esw->dev, "Failed to create indirection table, err %d.\n", err);
+ goto out_err;
+ }
+ }
+ mutex_unlock(&esw->fdb_table.offloads.indir->lock);
+ return e->ft;
+
+out_err:
+ mutex_unlock(&esw->fdb_table.offloads.indir->lock);
+ return ERR_PTR(err);
+}
+
+void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport, bool decap)
+{
+ struct mlx5_esw_indir_table_entry *e;
+
+ mutex_lock(&esw->fdb_table.offloads.indir->lock);
+ e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version);
+ if (!e)
+ goto out;
+
+ if (!decap)
+ e->fwd_ref--;
+ else
+ mlx5_esw_indir_table_rule_put(esw, attr, e);
+
+ if (e->fwd_ref || e->recirc_cnt)
+ goto out;
+
+ hash_del(&e->hlist);
+ mlx5_destroy_flow_group(e->recirc_grp);
+ mlx5_del_flow_rules(e->fwd_rule);
+ mlx5_destroy_flow_group(e->fwd_grp);
+ mlx5_destroy_flow_table(e->ft);
+ kfree(e);
+out:
+ mutex_unlock(&esw->fdb_table.offloads.indir->lock);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h
new file mode 100644
index 000000000000..cb9eafd1b4ee
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_ESW_FT_H__
+#define __MLX5_ESW_FT_H__
+
+#ifdef CONFIG_MLX5_CLS_ACT
+
+struct mlx5_esw_indir_table *
+mlx5_esw_indir_table_init(void);
+void
+mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir);
+
+struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec,
+ u16 vport, bool decap);
+void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport, bool decap);
+
+bool
+mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport_num,
+ struct mlx5_core_dev *dest_mdev);
+
+u16
+mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr);
+
+#else
+/* indir API stubs */
+struct mlx5_esw_indir_table *
+mlx5_esw_indir_table_init(void)
+{
+ return NULL;
+}
+
+void
+mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir)
+{
+}
+
+static inline struct mlx5_flow_table *
+mlx5_esw_indir_table_get(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec,
+ u16 vport, bool decap)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void
+mlx5_esw_indir_table_put(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport, bool decap)
+{
+}
+
+bool
+mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ u16 vport_num,
+ struct mlx5_core_dev *dest_mdev)
+{
+ return false;
+}
+
+static inline u16
+mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr)
+{
+ return 0;
+}
+#endif
+
+#endif /* __MLX5_ESW_FT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 820305b1664e..aba17835465b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1300,6 +1300,13 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num,
(!vport_num && mlx5_core_is_ecpf(esw->dev)))
vport->info.trusted = true;
+ if (!mlx5_esw_is_manager_vport(esw, vport->vport) &&
+ MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
+ ret = mlx5_esw_vport_vhca_id_set(esw, vport_num);
+ if (ret)
+ goto err_vhca_mapping;
+ }
+
esw_vport_change_handle_locked(vport);
esw->enabled_vports++;
@@ -1307,6 +1314,11 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num,
done:
mutex_unlock(&esw->state_lock);
return ret;
+
+err_vhca_mapping:
+ esw_vport_cleanup(esw, vport);
+ mutex_unlock(&esw->state_lock);
+ return ret;
}
void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
@@ -1325,6 +1337,11 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
/* Disable events from this vport */
arm_vport_context_events_cmd(esw->dev, vport->vport, 0);
+
+ if (!mlx5_esw_is_manager_vport(esw, vport->vport) &&
+ MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
+ mlx5_esw_vport_vhca_id_clear(esw, vport_num);
+
/* We don't assume VFs will cleanup after themselves.
* Calling vport change handler while vport is disabled will cleanup
* the vport resources.
@@ -1815,6 +1832,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
mlx5e_mod_hdr_tbl_init(&esw->offloads.mod_hdr);
atomic64_set(&esw->offloads.num_flows, 0);
ida_init(&esw->offloads.vport_metadata_ida);
+ xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
mutex_init(&esw->state_lock);
mutex_init(&esw->mode_lock);
@@ -1854,6 +1872,8 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
esw_offloads_cleanup_reps(esw);
mutex_destroy(&esw->mode_lock);
mutex_destroy(&esw->state_lock);
+ WARN_ON(!xa_empty(&esw->offloads.vhca_map));
+ xa_destroy(&esw->offloads.vhca_map);
ida_destroy(&esw->offloads.vport_metadata_ida);
mlx5e_mod_hdr_tbl_destroy(&esw->offloads.mod_hdr);
mutex_destroy(&esw->offloads.encap_tbl_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 479d2ac2cd85..fdf5c8c05c1b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -36,6 +36,7 @@
#include <linux/if_ether.h>
#include <linux/if_link.h>
#include <linux/atomic.h>
+#include <linux/xarray.h>
#include <net/devlink.h>
#include <linux/mlx5/device.h>
#include <linux/mlx5/eswitch.h>
@@ -160,6 +161,8 @@ struct mlx5_vport {
struct devlink_port *dl_port;
};
+struct mlx5_esw_indir_table;
+
struct mlx5_eswitch_fdb {
union {
struct legacy_fdb {
@@ -176,9 +179,11 @@ struct mlx5_eswitch_fdb {
struct mlx5_flow_namespace *ns;
struct mlx5_flow_table *slow_fdb;
struct mlx5_flow_group *send_to_vport_grp;
+ struct mlx5_flow_group *send_to_vport_meta_grp;
struct mlx5_flow_group *peer_miss_grp;
struct mlx5_flow_handle **peer_miss_rules;
struct mlx5_flow_group *miss_grp;
+ struct mlx5_flow_handle **send_to_vport_meta_rules;
struct mlx5_flow_handle *miss_rule_uni;
struct mlx5_flow_handle *miss_rule_multi;
int vlan_push_pop_refcount;
@@ -190,6 +195,8 @@ struct mlx5_eswitch_fdb {
struct mutex lock;
} vports;
+ struct mlx5_esw_indir_table *indir;
+
} offloads;
};
u32 flags;
@@ -212,6 +219,7 @@ struct mlx5_esw_offload {
struct mod_hdr_tbl mod_hdr;
DECLARE_HASHTABLE(termtbl_tbl, 8);
struct mutex termtbl_mutex; /* protects termtbl hash */
+ struct xarray vhca_map;
const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
u8 inline_mode;
atomic64_t num_flows;
@@ -387,12 +395,14 @@ enum mlx5_flow_match_level {
enum {
MLX5_ESW_DEST_ENCAP = BIT(0),
MLX5_ESW_DEST_ENCAP_VALID = BIT(1),
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE = BIT(2),
};
enum {
MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0),
MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1),
MLX5_ESW_ATTR_FLAG_NO_IN_PORT = BIT(2),
+ MLX5_ESW_ATTR_FLAG_SRC_REWRITE = BIT(3),
};
struct mlx5_esw_flow_attr {
@@ -413,7 +423,9 @@ struct mlx5_esw_flow_attr {
struct mlx5_pkt_reformat *pkt_reformat;
struct mlx5_core_dev *mdev;
struct mlx5_termtbl_handle *termtbl;
+ int src_port_rewrite_act_id;
} dests[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5_rx_tun_attr *rx_tun_attr;
struct mlx5_pkt_reformat *decap_pkt_reformat;
};
@@ -734,6 +746,10 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p
u16 vport_num, u32 sfnum);
void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num);
+int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num);
+void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num);
+int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num);
+
/**
* mlx5_esw_event_info - Indicates eswitch mode changed/changing.
*
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 7f09f2bbf7c1..94cb0217b4f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -38,7 +38,9 @@
#include <linux/mlx5/fs.h>
#include "mlx5_core.h"
#include "eswitch.h"
+#include "esw/indir_table.h"
#include "esw/acl/ofld.h"
+#include "esw/indir_table.h"
#include "rdma.h"
#include "en.h"
#include "fs_core.h"
@@ -257,7 +259,9 @@ mlx5_eswitch_set_rule_flow_source(struct mlx5_eswitch *esw,
static void
mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr,
+ struct mlx5_eswitch *src_esw,
+ u16 vport)
{
void *misc2;
void *misc;
@@ -266,10 +270,12 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
* VHCA in dual-port RoCE mode, and matching on source vport may fail.
*/
if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ if (mlx5_esw_indir_table_decap_vport(attr))
+ vport = mlx5_esw_indir_table_decap_vport(attr);
misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0,
- mlx5_eswitch_get_vport_metadata_for_match(attr->in_mdev->priv.eswitch,
- attr->in_rep->vport));
+ mlx5_eswitch_get_vport_metadata_for_match(src_esw,
+ vport));
misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0,
@@ -278,12 +284,12 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
} else {
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
+ MLX5_SET(fte_match_set_misc, misc, source_port, vport);
if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
MLX5_SET(fte_match_set_misc, misc,
source_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+ MLX5_CAP_GEN(src_esw->dev, vhca_id));
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
@@ -295,6 +301,299 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
}
}
+static int
+esw_setup_decap_indir(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec)
+{
+ struct mlx5_flow_table *ft;
+
+ if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE))
+ return -EOPNOTSUPP;
+
+ ft = mlx5_esw_indir_table_get(esw, attr, spec,
+ mlx5_esw_indir_table_decap_vport(attr), true);
+ return PTR_ERR_OR_ZERO(ft);
+}
+
+static void
+esw_cleanup_decap_indir(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr)
+{
+ if (mlx5_esw_indir_table_decap_vport(attr))
+ mlx5_esw_indir_table_put(esw, attr,
+ mlx5_esw_indir_table_decap_vport(attr),
+ true);
+}
+
+static int
+esw_setup_ft_dest(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec,
+ int i)
+{
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = attr->dest_ft;
+
+ if (mlx5_esw_indir_table_decap_vport(attr))
+ return esw_setup_decap_indir(esw, attr, spec);
+ return 0;
+}
+
+static void
+esw_setup_slow_path_dest(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_fs_chains *chains,
+ int i)
+{
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = mlx5_chains_get_tc_end_ft(chains);
+}
+
+static int
+esw_setup_chain_dest(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_fs_chains *chains,
+ u32 chain, u32 prio, u32 level,
+ int i)
+{
+ struct mlx5_flow_table *ft;
+
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ ft = mlx5_chains_get_table(chains, chain, prio, level);
+ if (IS_ERR(ft))
+ return PTR_ERR(ft);
+
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = ft;
+ return 0;
+}
+
+static void esw_put_dest_tables_loop(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr,
+ int from, int to)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ int i;
+
+ for (i = from; i < to; i++)
+ if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+ mlx5_chains_put_table(chains, 0, 1, 0);
+ else if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport,
+ esw_attr->dests[i].mdev))
+ mlx5_esw_indir_table_put(esw, attr, esw_attr->dests[i].rep->vport,
+ false);
+}
+
+static bool
+esw_is_chain_src_port_rewrite(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr)
+{
+ int i;
+
+ for (i = esw_attr->split_count; i < esw_attr->out_count; i++)
+ if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+ return true;
+ return false;
+}
+
+static int
+esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw,
+ struct mlx5_fs_chains *chains,
+ struct mlx5_flow_attr *attr,
+ int *i)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ int j, err;
+
+ if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE))
+ return -EOPNOTSUPP;
+
+ for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) {
+ err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i);
+ if (err)
+ goto err_setup_chain;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_act->pkt_reformat = esw_attr->dests[j].pkt_reformat;
+ }
+ return 0;
+
+err_setup_chain:
+ esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, j);
+ return err;
+}
+
+static void esw_cleanup_chain_src_port_rewrite(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+
+ esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count);
+}
+
+static bool
+esw_is_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ int i;
+
+ for (i = esw_attr->split_count; i < esw_attr->out_count; i++)
+ if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport,
+ esw_attr->dests[i].mdev))
+ return true;
+ return false;
+}
+
+static int
+esw_setup_indir_table(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec,
+ bool ignore_flow_lvl,
+ int *i)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ int j, err;
+
+ if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE))
+ return -EOPNOTSUPP;
+
+ for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) {
+ if (ignore_flow_lvl)
+ flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[*i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+
+ dest[*i].ft = mlx5_esw_indir_table_get(esw, attr, spec,
+ esw_attr->dests[j].rep->vport, false);
+ if (IS_ERR(dest[*i].ft)) {
+ err = PTR_ERR(dest[*i].ft);
+ goto err_indir_tbl_get;
+ }
+ }
+
+ if (mlx5_esw_indir_table_decap_vport(attr)) {
+ err = esw_setup_decap_indir(esw, attr, spec);
+ if (err)
+ goto err_indir_tbl_get;
+ }
+
+ return 0;
+
+err_indir_tbl_get:
+ esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, j);
+ return err;
+}
+
+static void esw_cleanup_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+
+ esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count);
+ esw_cleanup_decap_indir(esw, attr);
+}
+
+static void
+esw_cleanup_chain_dest(struct mlx5_fs_chains *chains, u32 chain, u32 prio, u32 level)
+{
+ mlx5_chains_put_table(chains, chain, prio, level);
+}
+
+static void
+esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr,
+ int attr_idx, int dest_idx, bool pkt_reformat)
+{
+ dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport;
+ dest[dest_idx].vport.vhca_id =
+ MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) {
+ if (pkt_reformat) {
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat;
+ }
+ dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
+ dest[dest_idx].vport.pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat;
+ }
+}
+
+static int
+esw_setup_vport_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr,
+ int i)
+{
+ int j;
+
+ for (j = esw_attr->split_count; j < esw_attr->out_count; j++, i++)
+ esw_setup_vport_dest(dest, flow_act, esw, esw_attr, j, i, true);
+ return i;
+}
+
+static int
+esw_setup_dests(struct mlx5_flow_destination *dest,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_spec *spec,
+ int *i)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ int err = 0;
+
+ if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) &&
+ MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve) &&
+ mlx5_eswitch_vport_match_metadata_enabled(esw))
+ attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
+
+ if (attr->dest_ft) {
+ esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i);
+ (*i)++;
+ } else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) {
+ esw_setup_slow_path_dest(dest, flow_act, chains, *i);
+ (*i)++;
+ } else if (attr->dest_chain) {
+ err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain,
+ 1, 0, *i);
+ (*i)++;
+ } else if (esw_is_indir_table(esw, attr)) {
+ err = esw_setup_indir_table(dest, flow_act, esw, attr, spec, true, i);
+ } else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) {
+ err = esw_setup_chain_src_port_rewrite(dest, flow_act, esw, chains, attr, i);
+ } else {
+ *i = esw_setup_vport_dests(dest, flow_act, esw, esw_attr, *i);
+ }
+
+ return err;
+}
+
+static void
+esw_cleanup_dests(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+
+ if (attr->dest_ft) {
+ esw_cleanup_decap_indir(esw, attr);
+ } else if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)) {
+ if (attr->dest_chain)
+ esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0);
+ else if (esw_is_indir_table(esw, attr))
+ esw_cleanup_indir_table(esw, attr);
+ else if (esw_is_chain_src_port_rewrite(esw, esw_attr))
+ esw_cleanup_chain_src_port_rewrite(esw, attr);
+ }
+}
+
struct mlx5_flow_handle *
mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
@@ -308,7 +607,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_vport_tbl_attr fwd_attr;
struct mlx5_flow_handle *rule;
struct mlx5_flow_table *fdb;
- int j, i = 0;
+ int i = 0;
if (esw->mode != MLX5_ESWITCH_OFFLOADS)
return ERR_PTR(-EOPNOTSUPP);
@@ -329,50 +628,15 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
}
}
+ mlx5_eswitch_set_rule_flow_source(esw, spec, esw_attr);
+
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
- struct mlx5_flow_table *ft;
-
- if (attr->dest_ft) {
- flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
- dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest[i].ft = attr->dest_ft;
- i++;
- } else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) {
- flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
- dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest[i].ft = mlx5_chains_get_tc_end_ft(chains);
- i++;
- } else if (attr->dest_chain) {
- flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
- ft = mlx5_chains_get_table(chains, attr->dest_chain,
- 1, 0);
- if (IS_ERR(ft)) {
- rule = ERR_CAST(ft);
- goto err_create_goto_table;
- }
-
- dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest[i].ft = ft;
- i++;
- } else {
- for (j = esw_attr->split_count; j < esw_attr->out_count; j++) {
- dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- dest[i].vport.num = esw_attr->dests[j].rep->vport;
- dest[i].vport.vhca_id =
- MLX5_CAP_GEN(esw_attr->dests[j].mdev, vhca_id);
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- dest[i].vport.flags |=
- MLX5_FLOW_DEST_VPORT_VHCA_ID;
- if (esw_attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) {
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
- flow_act.pkt_reformat =
- esw_attr->dests[j].pkt_reformat;
- dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
- dest[i].vport.pkt_reformat =
- esw_attr->dests[j].pkt_reformat;
- }
- i++;
- }
+ int err;
+
+ err = esw_setup_dests(dest, &flow_act, esw, attr, spec, &i);
+ if (err) {
+ rule = ERR_PTR(err);
+ goto err_create_goto_table;
}
}
@@ -407,15 +671,15 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
fdb = attr->ft;
if (!(attr->flags & MLX5_ESW_ATTR_FLAG_NO_IN_PORT))
- mlx5_eswitch_set_rule_source_port(esw, spec, esw_attr);
+ mlx5_eswitch_set_rule_source_port(esw, spec, attr,
+ esw_attr->in_mdev->priv.eswitch,
+ esw_attr->in_rep->vport);
}
if (IS_ERR(fdb)) {
rule = ERR_CAST(fdb);
goto err_esw_get;
}
- mlx5_eswitch_set_rule_flow_source(esw, spec, esw_attr);
-
if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec))
rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, esw_attr,
&flow_act, dest, i);
@@ -434,8 +698,7 @@ err_add_rule:
else if (attr->chain || attr->prio)
mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
err_esw_get:
- if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) && attr->dest_chain)
- mlx5_chains_put_table(chains, attr->dest_chain, 1, 0);
+ esw_cleanup_dests(esw, attr);
err_create_goto_table:
return rule;
}
@@ -453,7 +716,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_table *fast_fdb;
struct mlx5_flow_table *fwd_fdb;
struct mlx5_flow_handle *rule;
- int i;
+ int i, err = 0;
fast_fdb = mlx5_chains_get_table(chains, attr->chain, attr->prio, 0);
if (IS_ERR(fast_fdb)) {
@@ -472,22 +735,26 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
for (i = 0; i < esw_attr->split_count; i++) {
- dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- dest[i].vport.num = esw_attr->dests[i].rep->vport;
- dest[i].vport.vhca_id =
- MLX5_CAP_GEN(esw_attr->dests[i].mdev, vhca_id);
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
- if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) {
- dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
- dest[i].vport.pkt_reformat = esw_attr->dests[i].pkt_reformat;
+ if (esw_is_indir_table(esw, attr))
+ err = esw_setup_indir_table(dest, &flow_act, esw, attr, spec, false, &i);
+ else if (esw_is_chain_src_port_rewrite(esw, esw_attr))
+ err = esw_setup_chain_src_port_rewrite(dest, &flow_act, esw, chains, attr,
+ &i);
+ else
+ esw_setup_vport_dest(dest, &flow_act, esw, esw_attr, i, i, false);
+
+ if (err) {
+ rule = ERR_PTR(err);
+ goto err_chain_src_rewrite;
}
}
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest[i].ft = fwd_fdb;
i++;
- mlx5_eswitch_set_rule_source_port(esw, spec, esw_attr);
+ mlx5_eswitch_set_rule_source_port(esw, spec, attr,
+ esw_attr->in_mdev->priv.eswitch,
+ esw_attr->in_rep->vport);
if (attr->outer_match_level != MLX5_MATCH_NONE)
spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
@@ -495,13 +762,16 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
- if (IS_ERR(rule))
- goto add_err;
+ if (IS_ERR(rule)) {
+ i = esw_attr->split_count;
+ goto err_chain_src_rewrite;
+ }
atomic64_inc(&esw->offloads.num_flows);
return rule;
-add_err:
+err_chain_src_rewrite:
+ esw_put_dest_tables_loop(esw, attr, 0, i);
esw_vport_tbl_put(esw, &fwd_attr);
err_get_fwd:
mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
@@ -542,13 +812,13 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
if (fwd_rule) {
esw_vport_tbl_put(esw, &fwd_attr);
mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
+ esw_put_dest_tables_loop(esw, attr, 0, esw_attr->split_count);
} else {
if (split)
esw_vport_tbl_put(esw, &fwd_attr);
else if (attr->chain || attr->prio)
mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
- if (attr->dest_chain)
- mlx5_chains_put_table(chains, attr->dest_chain, 1, 0);
+ esw_cleanup_dests(esw, attr);
}
}
@@ -810,6 +1080,81 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
mlx5_del_flow_rules(rule);
}
+static void mlx5_eswitch_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_handle **flows = esw->fdb_table.offloads.send_to_vport_meta_rules;
+ int i = 0, num_vfs = esw->esw_funcs.num_vfs, vport_num;
+
+ if (!num_vfs || !flows)
+ return;
+
+ mlx5_esw_for_each_vf_vport_num(esw, vport_num, num_vfs)
+ mlx5_del_flow_rules(flows[i++]);
+
+ kvfree(flows);
+}
+
+static int
+mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
+{
+ int num_vfs, vport_num, rule_idx = 0, err = 0;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_handle **flows;
+ struct mlx5_flow_spec *spec;
+
+ num_vfs = esw->esw_funcs.num_vfs;
+ flows = kvzalloc(num_vfs * sizeof(*flows), GFP_KERNEL);
+ if (!flows)
+ return -ENOMEM;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto alloc_err;
+ }
+
+ MLX5_SET(fte_match_param, spec->match_criteria,
+ misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(fte_match_param, spec->match_criteria,
+ misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_1,
+ ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK);
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ mlx5_esw_for_each_vf_vport_num(esw, vport_num, num_vfs) {
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
+ dest.vport.num = vport_num;
+
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+ spec, &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule idx %d, err %ld\n",
+ rule_idx, PTR_ERR(flow_rule));
+ goto rule_err;
+ }
+ flows[rule_idx++] = flow_rule;
+ }
+
+ esw->fdb_table.offloads.send_to_vport_meta_rules = flows;
+ kvfree(spec);
+ return 0;
+
+rule_err:
+ while (--rule_idx >= 0)
+ mlx5_del_flow_rules(flows[rule_idx]);
+ kvfree(spec);
+alloc_err:
+ kvfree(flows);
+ return err;
+}
+
static bool mlx5_eswitch_reg_c1_loopback_supported(struct mlx5_eswitch *esw)
{
return MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
@@ -1292,11 +1637,11 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_table_attr ft_attr = {};
+ int num_vfs, table_size, ix, err = 0;
struct mlx5_core_dev *dev = esw->dev;
struct mlx5_flow_namespace *root_ns;
struct mlx5_flow_table *fdb = NULL;
u32 flags = 0, *flow_group_in;
- int table_size, ix, err = 0;
struct mlx5_flow_group *g;
void *match_criteria;
u8 *dmac;
@@ -1322,7 +1667,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
}
table_size = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ +
- MLX5_ESW_MISS_FLOWS + esw->total_vports;
+ MLX5_ESW_MISS_FLOWS + esw->total_vports + esw->esw_funcs.num_vfs;
/* create the slow path fdb with encap set, so further table instances
* can be created at run time while VFs are probed if the FW allows that.
@@ -1370,6 +1715,38 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
}
esw->fdb_table.offloads.send_to_vport_grp = g;
+ /* meta send to vport */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+ MLX5_SET(fte_match_param, match_criteria,
+ misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(fte_match_param, match_criteria,
+ misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
+
+ num_vfs = esw->esw_funcs.num_vfs;
+ if (num_vfs) {
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + num_vfs - 1);
+ ix += num_vfs;
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create send-to-vport meta flow group err(%d)\n",
+ err);
+ goto send_vport_meta_err;
+ }
+ esw->fdb_table.offloads.send_to_vport_meta_grp = g;
+
+ err = mlx5_eswitch_add_send_to_vport_meta_rules(esw);
+ if (err)
+ goto meta_rule_err;
+ }
+
if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
/* create peer esw miss group */
memset(flow_group_in, 0, inlen);
@@ -1437,6 +1814,11 @@ miss_err:
if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
peer_miss_err:
+ mlx5_eswitch_del_send_to_vport_meta_rules(esw);
+meta_rule_err:
+ if (esw->fdb_table.offloads.send_to_vport_meta_grp)
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp);
+send_vport_meta_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
send_vport_err:
esw_chains_destroy(esw, esw_chains(esw));
@@ -1458,7 +1840,10 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
+ mlx5_eswitch_del_send_to_vport_meta_rules(esw);
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
+ if (esw->fdb_table.offloads.send_to_vport_meta_grp)
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp);
if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
@@ -2182,12 +2567,20 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
{
+ struct mlx5_esw_indir_table *indir;
int err;
memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
mutex_init(&esw->fdb_table.offloads.vports.lock);
hash_init(esw->fdb_table.offloads.vports.table);
+ indir = mlx5_esw_indir_table_init();
+ if (IS_ERR(indir)) {
+ err = PTR_ERR(indir);
+ goto create_indir_err;
+ }
+ esw->fdb_table.offloads.indir = indir;
+
err = esw_create_uplink_offloads_acl_tables(esw);
if (err)
goto create_acl_err;
@@ -2219,6 +2612,8 @@ create_restore_err:
create_offloads_err:
esw_destroy_uplink_offloads_acl_tables(esw);
create_acl_err:
+ mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
+create_indir_err:
mutex_destroy(&esw->fdb_table.offloads.vports.lock);
return err;
}
@@ -2230,6 +2625,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
esw_destroy_restore_table(esw);
esw_destroy_offloads_table(esw);
esw_destroy_uplink_offloads_acl_tables(esw);
+ mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
mutex_destroy(&esw->fdb_table.offloads.vports.lock);
}
@@ -2867,3 +3263,94 @@ void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
mlx5_esw_vport_disable(esw, vport_num);
}
+
+static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num, u16 *vhca_id)
+{
+ int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ void *query_ctx;
+ void *hca_caps;
+ int err;
+
+ *vhca_id = 0;
+ if (mlx5_esw_is_manager_vport(esw, vport_num) ||
+ !MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
+ return -EPERM;
+
+ query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+ if (!query_ctx)
+ return -ENOMEM;
+
+ err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx);
+ if (err)
+ goto out_free;
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ *vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id);
+
+out_free:
+ kfree(query_ctx);
+ return err;
+}
+
+int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ u16 *old_entry, *vhca_map_entry, vhca_id;
+ int err;
+
+ err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id);
+ if (err) {
+ esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%u,err=%d)\n",
+ vport_num, err);
+ return err;
+ }
+
+ vhca_map_entry = kmalloc(sizeof(*vhca_map_entry), GFP_KERNEL);
+ if (!vhca_map_entry)
+ return -ENOMEM;
+
+ *vhca_map_entry = vport_num;
+ old_entry = xa_store(&esw->offloads.vhca_map, vhca_id, vhca_map_entry, GFP_KERNEL);
+ if (xa_is_err(old_entry)) {
+ kfree(vhca_map_entry);
+ return xa_err(old_entry);
+ }
+ kfree(old_entry);
+ return 0;
+}
+
+void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ u16 *vhca_map_entry, vhca_id;
+ int err;
+
+ err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id);
+ if (err)
+ esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%hu,err=%d)\n",
+ vport_num, err);
+
+ vhca_map_entry = xa_erase(&esw->offloads.vhca_map, vhca_id);
+ kfree(vhca_map_entry);
+}
+
+int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num)
+{
+ u16 *res = xa_load(&esw->offloads.vhca_map, vhca_id);
+
+ if (!res)
+ return -ENOENT;
+
+ *vport_num = *res;
+ return 0;
+}
+
+u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
+ u16 vport_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+ if (WARN_ON_ONCE(IS_ERR(vport)))
+ return 0;
+
+ return vport->metadata;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_set);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 3754ef98554f..efe403c7e354 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -270,5 +270,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev);
void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup);
int mlx5_load_one(struct mlx5_core_dev *dev, bool boot);
+int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out);
+
void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work);
#endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index ba78e0660523..e05c5c0f3ae1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -1164,3 +1164,15 @@ u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev) + mlx5_sf_max_functions(dev);
}
EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports);
+
+int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out)
+{
+ u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
+ u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {};
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
+ MLX5_SET(query_hca_cap_in, in, function_id, function_id);
+ MLX5_SET(query_hca_cap_in, in, other_function, true);
+ return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
+}
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 29fd832950e0..994c2c8cb4fd 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -96,6 +96,35 @@ static inline u32 mlx5_eswitch_get_vport_metadata_mask(void)
u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
u16 vport_num);
+u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
+ u16 vport_num);
+
+/* Reg C1 usage:
+ * Reg C1 = < ESW_TUN_ID(12) | ESW_TUN_OPTS(12) | ESW_ZONE_ID(8) >
+ *
+ * Highest 12 bits of reg c1 is the encapsulation tunnel id, next 12 bits is
+ * encapsulation tunnel options, and the lowest 8 bits are used for zone id.
+ *
+ * Zone id is used to restore CT flow when packet misses on chain.
+ *
+ * Tunnel id and options are used together to restore the tunnel info metadata
+ * on miss and to support inner header rewrite by means of implicit chain 0
+ * flows.
+ */
+#define ESW_ZONE_ID_BITS 8
+#define ESW_TUN_OPTS_BITS 12
+#define ESW_TUN_ID_BITS 12
+#define ESW_TUN_OPTS_OFFSET ESW_ZONE_ID_BITS
+#define ESW_TUN_OFFSET ESW_TUN_OPTS_OFFSET
+#define ESW_ZONE_ID_MASK GENMASK(ESW_ZONE_ID_BITS - 1, 0)
+#define ESW_TUN_OPTS_MASK GENMASK(32 - ESW_TUN_ID_BITS - 1, ESW_TUN_OPTS_OFFSET)
+#define ESW_TUN_MASK GENMASK(31, ESW_TUN_OFFSET)
+#define ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT 0 /* 0 is not a valid tunnel id */
+#define ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT 0xFFF /* 0xFFF is a reserved mapping */
+#define ESW_TUN_SLOW_TABLE_GOTO_VPORT ((ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT << ESW_TUN_OPTS_BITS) | \
+ ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT)
+#define ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK ESW_TUN_OPTS_MASK
+
u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev);
#else /* CONFIG_MLX5_ESWITCH */