summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/alloc.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c212
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c116
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ecpf.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h61
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/health.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c81
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c540
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h93
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h88
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h60
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c (renamed from drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c)112
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h66
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c182
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c110
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c278
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c110
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c66
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c47
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c920
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h97
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c663
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c944
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h68
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c124
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c557
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c463
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c66
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c63
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c911
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h93
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c47
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c183
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h3
91 files changed, 5365 insertions, 2945 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 10e6886c96ba..2d477f9a8cb7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -16,7 +16,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
- diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o
+ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o fw_reset.o
#
# Netdev basic
@@ -24,7 +24,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
en_selftest.o en/port.o en/monitor_stats.o en/health.o \
- en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/umem.o \
+ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o
#
@@ -37,7 +37,7 @@ mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag_mp.o lib/geneve.o lib/port_tun.o \
en_rep.o en/rep/bond.o en/mod_hdr.o
mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \
- en/mapping.o esw/chains.o en/tc_tun.o \
+ en/mapping.o lib/fs_chains.o en/tc_tun.o \
en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o
mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o
@@ -49,7 +49,8 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offlo
ecpf.o rdma.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \
esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
- esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o
+ esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o \
+ esw/devlink_port.o
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c
index 2f13a250aab3..d6667d38e1de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIBt
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
#include "mlx5_core.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 8db4b5f0f963..291e427e9e4f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -56,8 +56,8 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
size_t size, dma_addr_t *dma_handle,
int node)
{
+ struct device *device = mlx5_core_dma_dev(dev);
struct mlx5_priv *priv = &dev->priv;
- struct device *device = dev->device;
int original_node;
void *cpu_handle;
@@ -111,7 +111,7 @@ EXPORT_SYMBOL(mlx5_buf_alloc);
void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
{
- dma_free_coherent(dev->device, buf->size, buf->frags->buf,
+ dma_free_coherent(mlx5_core_dma_dev(dev), buf->size, buf->frags->buf,
buf->frags->map);
kfree(buf->frags);
@@ -140,7 +140,7 @@ int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
if (!frag->buf)
goto err_free_buf;
if (frag->map & ((1 << buf->page_shift) - 1)) {
- dma_free_coherent(dev->device, frag_sz,
+ dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz,
buf->frags[i].buf, buf->frags[i].map);
mlx5_core_warn(dev, "unexpected map alignment: %pad, page_shift=%d\n",
&frag->map, buf->page_shift);
@@ -153,7 +153,7 @@ int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
err_free_buf:
while (i--)
- dma_free_coherent(dev->device, PAGE_SIZE, buf->frags[i].buf,
+ dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE, buf->frags[i].buf,
buf->frags[i].map);
kfree(buf->frags);
err_out:
@@ -169,7 +169,7 @@ void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
for (i = 0; i < buf->npages; i++) {
int frag_sz = min_t(int, size, PAGE_SIZE);
- dma_free_coherent(dev->device, frag_sz, buf->frags[i].buf,
+ dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz, buf->frags[i].buf,
buf->frags[i].map);
size -= frag_sz;
}
@@ -275,7 +275,7 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
__set_bit(db->index, db->u.pgdir->bitmap);
if (bitmap_full(db->u.pgdir->bitmap, db_per_page)) {
- dma_free_coherent(dev->device, PAGE_SIZE,
+ dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE,
db->u.pgdir->db_page, db->u.pgdir->db_dma);
list_del(&db->u.pgdir->list);
bitmap_free(db->u.pgdir->bitmap);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 1d91a0d0ab1d..e49387dbef98 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -69,12 +69,10 @@ enum {
MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10,
};
-static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
- struct mlx5_cmd_msg *in,
- struct mlx5_cmd_msg *out,
- void *uout, int uout_size,
- mlx5_cmd_cbk_t cbk,
- void *context, int page_queue)
+static struct mlx5_cmd_work_ent *
+cmd_alloc_ent(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in,
+ struct mlx5_cmd_msg *out, void *uout, int uout_size,
+ mlx5_cmd_cbk_t cbk, void *context, int page_queue)
{
gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL;
struct mlx5_cmd_work_ent *ent;
@@ -83,6 +81,7 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
if (!ent)
return ERR_PTR(-ENOMEM);
+ ent->idx = -EINVAL;
ent->in = in;
ent->out = out;
ent->uout = uout;
@@ -91,10 +90,16 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
ent->context = context;
ent->cmd = cmd;
ent->page_queue = page_queue;
+ refcount_set(&ent->refcnt, 1);
return ent;
}
+static void cmd_free_ent(struct mlx5_cmd_work_ent *ent)
+{
+ kfree(ent);
+}
+
static u8 alloc_token(struct mlx5_cmd *cmd)
{
u8 token;
@@ -109,7 +114,7 @@ static u8 alloc_token(struct mlx5_cmd *cmd)
return token;
}
-static int alloc_ent(struct mlx5_cmd *cmd)
+static int cmd_alloc_index(struct mlx5_cmd *cmd)
{
unsigned long flags;
int ret;
@@ -123,7 +128,7 @@ static int alloc_ent(struct mlx5_cmd *cmd)
return ret < cmd->max_reg_cmds ? ret : -ENOMEM;
}
-static void free_ent(struct mlx5_cmd *cmd, int idx)
+static void cmd_free_index(struct mlx5_cmd *cmd, int idx)
{
unsigned long flags;
@@ -132,6 +137,22 @@ static void free_ent(struct mlx5_cmd *cmd, int idx)
spin_unlock_irqrestore(&cmd->alloc_lock, flags);
}
+static void cmd_ent_get(struct mlx5_cmd_work_ent *ent)
+{
+ refcount_inc(&ent->refcnt);
+}
+
+static void cmd_ent_put(struct mlx5_cmd_work_ent *ent)
+{
+ if (!refcount_dec_and_test(&ent->refcnt))
+ return;
+
+ if (ent->idx >= 0)
+ cmd_free_index(ent->cmd, ent->idx);
+
+ cmd_free_ent(ent);
+}
+
static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
{
return cmd->cmd_buf + (idx << cmd->log_stride);
@@ -219,11 +240,6 @@ static void poll_timeout(struct mlx5_cmd_work_ent *ent)
ent->ret = -ETIMEDOUT;
}
-static void free_cmd(struct mlx5_cmd_work_ent *ent)
-{
- kfree(ent);
-}
-
static int verify_signature(struct mlx5_cmd_work_ent *ent)
{
struct mlx5_cmd_mailbox *next = ent->out->next;
@@ -837,11 +853,22 @@ static void cb_timeout_handler(struct work_struct *work)
struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev,
cmd);
+ mlx5_cmd_eq_recover(dev);
+
+ /* Maybe got handled by eq recover ? */
+ if (!test_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state)) {
+ mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, recovered after timeout\n", ent->idx,
+ mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
+ goto out; /* phew, already handled */
+ }
+
ent->ret = -ETIMEDOUT;
- mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
- mlx5_command_str(msg_to_opcode(ent->in)),
- msg_to_opcode(ent->in));
+ mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n",
+ ent->idx, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+
+out:
+ cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */
}
static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg);
@@ -856,6 +883,32 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode)
return cmd->allowed_opcode == opcode;
}
+static int cmd_alloc_index_retry(struct mlx5_cmd *cmd)
+{
+ unsigned long alloc_end = jiffies + msecs_to_jiffies(1000);
+ int idx;
+
+retry:
+ idx = cmd_alloc_index(cmd);
+ if (idx < 0 && time_before(jiffies, alloc_end)) {
+ /* Index allocation can fail on heavy load of commands. This is a temporary
+ * situation as the current command already holds the semaphore, meaning that
+ * another command completion is being handled and it is expected to release
+ * the entry index soon.
+ */
+ cpu_relax();
+ goto retry;
+ }
+ return idx;
+}
+
+bool mlx5_cmd_is_down(struct mlx5_core_dev *dev)
+{
+ return pci_channel_offline(dev->pdev) ||
+ dev->cmd.state != MLX5_CMDIF_STATE_UP ||
+ dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR;
+}
+
static void cmd_work_handler(struct work_struct *work)
{
struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
@@ -873,14 +926,14 @@ static void cmd_work_handler(struct work_struct *work)
sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
down(sem);
if (!ent->page_queue) {
- alloc_ret = alloc_ent(cmd);
+ alloc_ret = cmd_alloc_index_retry(cmd);
if (alloc_ret < 0) {
mlx5_core_err_rl(dev, "failed to allocate command entry\n");
if (ent->callback) {
ent->callback(-EAGAIN, ent->context);
mlx5_free_cmd_msg(dev, ent->out);
free_msg(dev, ent->in);
- free_cmd(ent);
+ cmd_ent_put(ent);
} else {
ent->ret = -EAGAIN;
complete(&ent->done);
@@ -916,15 +969,12 @@ static void cmd_work_handler(struct work_struct *work)
ent->ts1 = ktime_get_ns();
cmd_mode = cmd->mode;
- if (ent->callback)
- schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
+ if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, cb_timeout))
+ cmd_ent_get(ent);
set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
/* Skip sending command to fw if internal error */
- if (pci_channel_offline(dev->pdev) ||
- dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
- cmd->state != MLX5_CMDIF_STATE_UP ||
- !opcode_allowed(&dev->cmd, ent->op)) {
+ if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) {
u8 status = 0;
u32 drv_synd;
@@ -933,13 +983,10 @@ static void cmd_work_handler(struct work_struct *work)
MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);
mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
- /* no doorbell, no need to keep the entry */
- free_ent(cmd, ent->idx);
- if (ent->callback)
- free_cmd(ent);
return;
}
+ cmd_ent_get(ent); /* for the _real_ FW event on completion */
/* ring doorbell after the descriptor is valid */
mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
wmb();
@@ -983,6 +1030,35 @@ static const char *deliv_status_to_str(u8 status)
}
}
+enum {
+ MLX5_CMD_TIMEOUT_RECOVER_MSEC = 5 * 1000,
+};
+
+static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_work_ent *ent)
+{
+ unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_RECOVER_MSEC);
+
+ mlx5_cmd_eq_recover(dev);
+
+ /* Re-wait on the ent->done after executing the recovery flow. If the
+ * recovery flow (or any other recovery flow running simultaneously)
+ * has recovered an EQE, it should cause the entry to be completed by
+ * the command interface.
+ */
+ if (wait_for_completion_timeout(&ent->done, timeout)) {
+ mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) recovered after timeout\n", ent->idx,
+ mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
+ return;
+ }
+
+ mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) No done completion\n", ent->idx,
+ mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
+
+ ent->ret = -ETIMEDOUT;
+ mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+}
+
static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
{
unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
@@ -994,12 +1070,10 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
ent->ret = -ECANCELED;
goto out_err;
}
- if (cmd->mode == CMD_MODE_POLLING || ent->polling) {
+ if (cmd->mode == CMD_MODE_POLLING || ent->polling)
wait_for_completion(&ent->done);
- } else if (!wait_for_completion_timeout(&ent->done, timeout)) {
- ent->ret = -ETIMEDOUT;
- mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
- }
+ else if (!wait_for_completion_timeout(&ent->done, timeout))
+ wait_func_handle_exec_timeout(dev, ent);
out_err:
err = ent->ret;
@@ -1039,11 +1113,16 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
if (callback && page_queue)
return -EINVAL;
- ent = alloc_cmd(cmd, in, out, uout, uout_size, callback, context,
- page_queue);
+ ent = cmd_alloc_ent(cmd, in, out, uout, uout_size,
+ callback, context, page_queue);
if (IS_ERR(ent))
return PTR_ERR(ent);
+ /* put for this ent is when consumed, depending on the use case
+ * 1) (!callback) blocking flow: by caller after wait_func completes
+ * 2) (callback) flow: by mlx5_cmd_comp_handler() when ent is handled
+ */
+
ent->token = token;
ent->polling = force_polling;
@@ -1062,12 +1141,10 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
}
if (callback)
- goto out;
+ goto out; /* mlx5_cmd_comp_handler() will put(ent) */
err = wait_func(dev, ent);
- if (err == -ETIMEDOUT)
- goto out;
- if (err == -ECANCELED)
+ if (err == -ETIMEDOUT || err == -ECANCELED)
goto out_free;
ds = ent->ts2 - ent->ts1;
@@ -1085,7 +1162,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
*status = ent->status;
out_free:
- free_cmd(ent);
+ cmd_ent_put(ent);
out:
return err;
}
@@ -1516,14 +1593,19 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
if (!forced) {
mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
ent->idx);
- free_ent(cmd, ent->idx);
- free_cmd(ent);
+ cmd_ent_put(ent);
}
continue;
}
- if (ent->callback)
- cancel_delayed_work(&ent->cb_timeout_work);
+ if (ent->callback && cancel_delayed_work(&ent->cb_timeout_work))
+ cmd_ent_put(ent); /* timeout work was canceled */
+
+ if (!forced || /* Real FW completion */
+ pci_channel_offline(dev->pdev) || /* FW is inaccessible */
+ dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ cmd_ent_put(ent);
+
if (ent->page_queue)
sem = &cmd->pages_sem;
else
@@ -1545,10 +1627,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
ent->ret, deliv_status_to_str(ent->status), ent->status);
}
- /* only real completion will free the entry slot */
- if (!forced)
- free_ent(cmd, ent->idx);
-
if (ent->callback) {
ds = ent->ts2 - ent->ts1;
if (ent->op < MLX5_CMD_OP_MAX) {
@@ -1576,10 +1654,13 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
free_msg(dev, ent->in);
err = err ? err : ent->status;
- if (!forced)
- free_cmd(ent);
+ /* final consumer is done, release ent */
+ cmd_ent_put(ent);
callback(err, context);
} else {
+ /* release wait_func() so mlx5_cmd_invoke()
+ * can make the final ent_put()
+ */
complete(&ent->done);
}
up(sem);
@@ -1589,8 +1670,11 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ unsigned long bitmask;
unsigned long flags;
u64 vector;
+ int i;
/* wait for pending handlers to complete */
mlx5_eq_synchronize_cmd_irq(dev);
@@ -1599,11 +1683,20 @@ void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
if (!vector)
goto no_trig;
+ bitmask = vector;
+ /* we must increment the allocated entries refcount before triggering the completions
+ * to guarantee pending commands will not get freed in the meanwhile.
+ * For that reason, it also has to be done inside the alloc_lock.
+ */
+ for_each_set_bit(i, &bitmask, (1 << cmd->log_sz))
+ cmd_ent_get(cmd->ent_arr[i]);
vector |= MLX5_TRIGGERED_CMD_COMP;
spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
mlx5_cmd_comp_handler(dev, vector, true);
+ for_each_set_bit(i, &bitmask, (1 << cmd->log_sz))
+ cmd_ent_put(cmd->ent_arr[i]);
return;
no_trig:
@@ -1711,10 +1804,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
u8 token;
opcode = MLX5_GET(mbox_in, in, opcode);
- if (pci_channel_offline(dev->pdev) ||
- dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
- dev->cmd.state != MLX5_CMDIF_STATE_UP ||
- !opcode_allowed(&dev->cmd, opcode)) {
+ if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) {
err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
MLX5_SET(mbox_out, out, status, status);
MLX5_SET(mbox_out, out, syndrome, drv_synd);
@@ -1899,9 +1989,7 @@ static void create_msg_cache(struct mlx5_core_dev *dev)
static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
{
- struct device *ddev = dev->device;
-
- cmd->cmd_alloc_buf = dma_alloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE,
+ cmd->cmd_alloc_buf = dma_alloc_coherent(mlx5_core_dma_dev(dev), MLX5_ADAPTER_PAGE_SIZE,
&cmd->alloc_dma, GFP_KERNEL);
if (!cmd->cmd_alloc_buf)
return -ENOMEM;
@@ -1914,9 +2002,9 @@ static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
return 0;
}
- dma_free_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf,
+ dma_free_coherent(mlx5_core_dma_dev(dev), MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf,
cmd->alloc_dma);
- cmd->cmd_alloc_buf = dma_alloc_coherent(ddev,
+ cmd->cmd_alloc_buf = dma_alloc_coherent(mlx5_core_dma_dev(dev),
2 * MLX5_ADAPTER_PAGE_SIZE - 1,
&cmd->alloc_dma, GFP_KERNEL);
if (!cmd->cmd_alloc_buf)
@@ -1930,9 +2018,7 @@ static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
static void free_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
{
- struct device *ddev = dev->device;
-
- dma_free_coherent(ddev, cmd->alloc_size, cmd->cmd_alloc_buf,
+ dma_free_coherent(mlx5_core_dma_dev(dev), cmd->alloc_size, cmd->cmd_alloc_buf,
cmd->alloc_dma);
}
@@ -1964,7 +2050,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
if (!cmd->stats)
return -ENOMEM;
- cmd->pool = dma_pool_create("mlx5_cmd", dev->device, size, align, 0);
+ cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0);
if (!cmd->pool) {
err = -ENOMEM;
goto dma_pool_err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 8379b24cb838..df3e4938ecdd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -42,11 +42,11 @@
#define TASKLET_MAX_TIME 2
#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
-void mlx5_cq_tasklet_cb(unsigned long data)
+void mlx5_cq_tasklet_cb(struct tasklet_struct *t)
{
unsigned long flags;
unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
- struct mlx5_eq_tasklet *ctx = (struct mlx5_eq_tasklet *)data;
+ struct mlx5_eq_tasklet *ctx = from_tasklet(ctx, t, task);
struct mlx5_core_cq *mcq;
struct mlx5_core_cq *temp;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index c709e9a385f6..a28f95df2901 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -4,22 +4,19 @@
#include <devlink.h>
#include "mlx5_core.h"
+#include "fw_reset.h"
#include "fs_core.h"
#include "eswitch.h"
static int mlx5_devlink_flash_update(struct devlink *devlink,
- const char *file_name,
- const char *component,
+ struct devlink_flash_update_params *params,
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
const struct firmware *fw;
int err;
- if (component)
- return -EOPNOTSUPP;
-
- err = request_firmware_direct(&fw, file_name, &dev->pdev->dev);
+ err = request_firmware_direct(&fw, params->file_name, &dev->pdev->dev);
if (err)
return err;
@@ -88,21 +85,96 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
return 0;
}
+static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ u8 reset_level, reset_type, net_port_alive;
+ int err;
+
+ err = mlx5_fw_reset_query(dev, &reset_level, &reset_type);
+ if (err)
+ return err;
+ if (!(reset_level & MLX5_MFRL_REG_RESET_LEVEL3)) {
+ NL_SET_ERR_MSG_MOD(extack, "FW activate requires reboot");
+ return -EINVAL;
+ }
+
+ net_port_alive = !!(reset_type & MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE);
+ err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive);
+ if (err)
+ goto out;
+
+ err = mlx5_fw_reset_wait_reset_done(dev);
+out:
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "FW activate command failed");
+ return err;
+}
+
+static int mlx5_devlink_trigger_fw_live_patch(struct devlink *devlink,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ u8 reset_level;
+ int err;
+
+ err = mlx5_fw_reset_query(dev, &reset_level, NULL);
+ if (err)
+ return err;
+ if (!(reset_level & MLX5_MFRL_REG_RESET_LEVEL0)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "FW upgrade to the stored FW can't be done by FW live patching");
+ return -EINVAL;
+ }
+
+ return mlx5_fw_reset_set_live_patch(dev);
+}
+
static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
- mlx5_unload_one(dev, false);
- return 0;
+ switch (action) {
+ case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+ mlx5_unload_one(dev, false);
+ return 0;
+ case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
+ if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
+ return mlx5_devlink_trigger_fw_live_patch(devlink, extack);
+ return mlx5_devlink_reload_fw_activate(devlink, extack);
+ default:
+ /* Unsupported action should not get to this function */
+ WARN_ON(1);
+ return -EOPNOTSUPP;
+ }
}
-static int mlx5_devlink_reload_up(struct devlink *devlink,
+static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action,
+ enum devlink_reload_limit limit, u32 *actions_performed,
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
- return mlx5_load_one(dev, false);
+ *actions_performed = BIT(action);
+ switch (action) {
+ case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+ return mlx5_load_one(dev, false);
+ case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
+ if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
+ break;
+ /* On fw_activate action, also driver is reloaded and reinit performed */
+ *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
+ return mlx5_load_one(dev, false);
+ default:
+ /* Unsupported action should not get to this function */
+ WARN_ON(1);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
}
static const struct devlink_ops mlx5_devlink_ops = {
@@ -118,6 +190,9 @@ static const struct devlink_ops mlx5_devlink_ops = {
#endif
.flash_update = mlx5_devlink_flash_update,
.info_get = mlx5_devlink_info_get,
+ .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
+ BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE),
+ .reload_limits = BIT(DEVLINK_RELOAD_LIMIT_NO_RESET),
.reload_down = mlx5_devlink_reload_down,
.reload_up = mlx5_devlink_reload_up,
};
@@ -228,6 +303,24 @@ static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id
}
#endif
+static int mlx5_devlink_enable_remote_dev_reset_set(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ mlx5_fw_reset_enable_remote_dev_reset_set(dev, ctx->val.vbool);
+ return 0;
+}
+
+static int mlx5_devlink_enable_remote_dev_reset_get(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ ctx->val.vbool = mlx5_fw_reset_enable_remote_dev_reset_get(dev);
+ return 0;
+}
+
static const struct devlink_param mlx5_devlink_params[] = {
DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE,
"flow_steering_mode", DEVLINK_PARAM_TYPE_STRING,
@@ -243,6 +336,9 @@ static const struct devlink_param mlx5_devlink_params[] = {
NULL, NULL,
mlx5_devlink_large_group_num_validate),
#endif
+ DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+ mlx5_devlink_enable_remote_dev_reset_get,
+ mlx5_devlink_enable_remote_dev_reset_set, NULL),
};
static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index ad3594c4afcb..2eb022ad7fd0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -124,7 +124,7 @@ static void mlx5_fw_tracer_ownership_release(struct mlx5_fw_tracer *tracer)
static int mlx5_fw_tracer_create_log_buf(struct mlx5_fw_tracer *tracer)
{
struct mlx5_core_dev *dev = tracer->dev;
- struct device *ddev = &dev->pdev->dev;
+ struct device *ddev;
dma_addr_t dma;
void *buff;
gfp_t gfp;
@@ -142,6 +142,7 @@ static int mlx5_fw_tracer_create_log_buf(struct mlx5_fw_tracer *tracer)
}
tracer->buff.log_buf = buff;
+ ddev = mlx5_core_dma_dev(dev);
dma = dma_map_single(ddev, buff, tracer->buff.size, DMA_FROM_DEVICE);
if (dma_mapping_error(ddev, dma)) {
mlx5_core_warn(dev, "FWTracer: Unable to map DMA: %d\n",
@@ -162,11 +163,12 @@ free_pages:
static void mlx5_fw_tracer_destroy_log_buf(struct mlx5_fw_tracer *tracer)
{
struct mlx5_core_dev *dev = tracer->dev;
- struct device *ddev = &dev->pdev->dev;
+ struct device *ddev;
if (!tracer->buff.log_buf)
return;
+ ddev = mlx5_core_dma_dev(dev);
dma_unmap_single(ddev, tracer->buff.dma, tracer->buff.size, DMA_FROM_DEVICE);
free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size));
}
@@ -1064,6 +1066,58 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
kvfree(tracer);
}
+static int mlx5_fw_tracer_recreate_strings_db(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev;
+ int err;
+
+ cancel_work_sync(&tracer->read_fw_strings_work);
+ mlx5_fw_tracer_clean_ready_list(tracer);
+ mlx5_fw_tracer_clean_print_hash(tracer);
+ mlx5_fw_tracer_clean_saved_traces_array(tracer);
+ mlx5_fw_tracer_free_strings_db(tracer);
+
+ dev = tracer->dev;
+ err = mlx5_query_mtrc_caps(tracer);
+ if (err) {
+ mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err);
+ return err;
+ }
+
+ err = mlx5_fw_tracer_allocate_strings_db(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Allocate strings DB failed %d\n", err);
+ return err;
+ }
+ mlx5_fw_tracer_init_saved_traces_array(tracer);
+
+ return 0;
+}
+
+int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev;
+ int err;
+
+ if (IS_ERR_OR_NULL(tracer))
+ return -EINVAL;
+
+ dev = tracer->dev;
+ mlx5_fw_tracer_cleanup(tracer);
+ err = mlx5_fw_tracer_recreate_strings_db(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "Failed to recreate FW tracer strings DB\n");
+ return err;
+ }
+ err = mlx5_fw_tracer_init(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "Failed to re-initialize FW tracer\n");
+ return err;
+ }
+
+ return 0;
+}
+
static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data)
{
struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
index 40601fba80ba..97252a85d65e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -191,5 +191,6 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev);
int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
struct devlink_fmsg *fmsg);
+int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
index 4924a5658853..ed4fb79b4db7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
@@ -78,7 +78,7 @@ static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump
struct page *page)
{
struct mlx5_rsc_dump *rsc_dump = dev->rsc_dump;
- struct device *ddev = &dev->pdev->dev;
+ struct device *ddev = mlx5_core_dma_dev(dev);
u32 out_seq_num;
u32 in_seq_num;
dma_addr_t dma;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
index a894ea98c95a..3dc9dd3f24dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
@@ -43,19 +43,13 @@ static void mlx5_peer_pf_cleanup(struct mlx5_core_dev *dev)
int mlx5_ec_init(struct mlx5_core_dev *dev)
{
- int err = 0;
-
if (!mlx5_core_is_ecpf(dev))
return 0;
/* ECPF shall enable HCA for peer PF in the same way a PF
* does this for its VFs.
*/
- err = mlx5_peer_pf_init(dev);
- if (err)
- return err;
-
- return 0;
+ return mlx5_peer_pf_init(dev);
}
void mlx5_ec_cleanup(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 0cc2080fd847..2f05b0f9de01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -91,7 +91,12 @@ struct page_pool;
#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
-#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
+/* Add another page to MLX5E_REQUIRED_WQE_MTTS as a buffer between
+ * WQEs, This page will absorb write overflow by the hardware, when
+ * receiving packets larger than MTU. These oversize packets are
+ * dropped by the driver at a later stage.
+ */
+#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE + 1, 8))
#define MLX5E_LOG_ALIGNED_MPWQE_PPW (ilog2(MLX5E_REQUIRED_WQE_MTTS))
#define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS)
#define MLX5E_MAX_RQ_NUM_MTTS \
@@ -221,6 +226,7 @@ enum mlx5e_priv_flag {
MLX5E_PFLAG_RX_STRIDING_RQ,
MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
MLX5E_PFLAG_XDP_TX_MPWQE,
+ MLX5E_PFLAG_SKB_TX_MPWQE,
MLX5E_NUM_PFLAGS, /* Keep last */
};
@@ -265,6 +271,7 @@ enum {
MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
MLX5E_RQ_STATE_FPGA_TLS, /* FPGA TLS enabled */
+ MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX /* set when mini_cqe_resp_stride_index cap is used */
};
struct mlx5e_cq {
@@ -304,6 +311,7 @@ struct mlx5e_sq_dma {
enum {
MLX5E_SQ_STATE_ENABLED,
+ MLX5E_SQ_STATE_MPWQE,
MLX5E_SQ_STATE_RECOVERING,
MLX5E_SQ_STATE_IPSEC,
MLX5E_SQ_STATE_AM,
@@ -312,26 +320,40 @@ enum {
MLX5E_SQ_STATE_PENDING_XSK_TX,
};
+struct mlx5e_tx_mpwqe {
+ /* Current MPWQE session */
+ struct mlx5e_tx_wqe *wqe;
+ u32 bytes_count;
+ u8 ds_count;
+ u8 pkt_count;
+ u8 inline_on;
+};
+
struct mlx5e_txqsq {
/* data path */
/* dirtied @completion */
u16 cc;
+ u16 skb_fifo_cc;
u32 dma_fifo_cc;
struct dim dim; /* Adaptive Moderation */
/* dirtied @xmit */
u16 pc ____cacheline_aligned_in_smp;
+ u16 skb_fifo_pc;
u32 dma_fifo_pc;
+ struct mlx5e_tx_mpwqe mpwqe;
struct mlx5e_cq cq;
/* read only */
struct mlx5_wq_cyc wq;
u32 dma_fifo_mask;
+ u16 skb_fifo_mask;
struct mlx5e_sq_stats *stats;
struct {
struct mlx5e_sq_dma *dma_fifo;
+ struct sk_buff **skb_fifo;
struct mlx5e_tx_wqe_info *wqe_info;
} db;
void __iomem *uar_map;
@@ -398,7 +420,7 @@ struct mlx5e_xdp_info {
};
};
-struct mlx5e_xdp_xmit_data {
+struct mlx5e_xmit_data {
dma_addr_t dma_addr;
void *data;
u32 len;
@@ -411,18 +433,10 @@ struct mlx5e_xdp_info_fifo {
u32 mask;
};
-struct mlx5e_xdp_mpwqe {
- /* Current MPWQE session */
- struct mlx5e_tx_wqe *wqe;
- u8 ds_count;
- u8 pkt_count;
- u8 inline_on;
-};
-
struct mlx5e_xdpsq;
typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
- struct mlx5e_xdp_xmit_data *,
+ struct mlx5e_xmit_data *,
struct mlx5e_xdp_info *,
int);
@@ -437,12 +451,12 @@ struct mlx5e_xdpsq {
u32 xdpi_fifo_pc ____cacheline_aligned_in_smp;
u16 pc;
struct mlx5_wqe_ctrl_seg *doorbell_cseg;
- struct mlx5e_xdp_mpwqe mpwqe;
+ struct mlx5e_tx_mpwqe mpwqe;
struct mlx5e_cq cq;
/* read only */
- struct xdp_umem *umem;
+ struct xsk_buff_pool *xsk_pool;
struct mlx5_wq_cyc wq;
struct mlx5e_xdpsq_stats *stats;
mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check;
@@ -600,13 +614,13 @@ struct mlx5e_rq {
struct dim dim; /* Dynamic Interrupt Moderation */
/* XDP */
- struct bpf_prog *xdp_prog;
+ struct bpf_prog __rcu *xdp_prog;
struct mlx5e_xdpsq *xdpsq;
DECLARE_BITMAP(flags, 8);
struct page_pool *page_pool;
/* AF_XDP zero-copy */
- struct xdp_umem *umem;
+ struct xsk_buff_pool *xsk_pool;
struct work_struct recover_work;
@@ -617,6 +631,7 @@ struct mlx5e_rq {
u32 rqn;
struct mlx5_core_dev *mdev;
struct mlx5_core_mkey umr_mkey;
+ struct mlx5e_dma_info wqe_overflow;
/* XDP read-mostly */
struct xdp_rxq_info xdp_rxq;
@@ -729,12 +744,13 @@ struct mlx5e_hv_vhca_stats_agent {
#endif
struct mlx5e_xsk {
- /* UMEMs are stored separately from channels, because we don't want to
- * lose them when channels are recreated. The kernel also stores UMEMs,
- * but it doesn't distinguish between zero-copy and non-zero-copy UMEMs,
- * so rely on our mechanism.
+ /* XSK buffer pools are stored separately from channels,
+ * because we don't want to lose them when channels are
+ * recreated. The kernel also stores buffer pool, but it doesn't
+ * distinguish between zero-copy and non-zero-copy UMEMs, so
+ * rely on our mechanism.
*/
- struct xdp_umem **umems;
+ struct xsk_buff_pool **pools;
u16 refcnt;
bool ever_used;
};
@@ -893,7 +909,7 @@ struct mlx5e_xsk_param;
struct mlx5e_rq_param;
int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
- struct xdp_umem *umem, struct mlx5e_rq *rq);
+ struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq);
int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
void mlx5e_close_rq(struct mlx5e_rq *rq);
@@ -903,7 +919,7 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
struct mlx5e_sq_param *param, struct mlx5e_icosq *sq);
void mlx5e_close_icosq(struct mlx5e_icosq *sq);
int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_sq_param *param, struct xdp_umem *umem,
+ struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
struct mlx5e_xdpsq *sq, bool is_redirect);
void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq);
@@ -1005,7 +1021,6 @@ int mlx5e_update_nic_rx(struct mlx5e_priv *priv);
void mlx5e_update_carrier(struct mlx5e_priv *priv);
int mlx5e_close(struct net_device *netdev);
int mlx5e_open(struct net_device *netdev);
-void mlx5e_update_ndo_stats(struct mlx5e_priv *priv);
void mlx5e_queue_update_stats(struct mlx5e_priv *priv);
int mlx5e_bits_invert(unsigned long a, int size);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 6fdcd5e69476..dc744702aee4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -12,9 +12,12 @@ enum {
};
struct mlx5e_tc_table {
- /* protects flow table */
+ /* Protects the dynamic assignment of the t parameter
+ * which is the nic tc root table.
+ */
struct mutex t_lock;
struct mlx5_flow_table *t;
+ struct mlx5_fs_chains *chains;
struct rhashtable ht;
@@ -24,6 +27,8 @@ struct mlx5e_tc_table {
struct notifier_block netdevice_nb;
struct netdev_net_notifier netdevice_nn;
+
+ struct mlx5_tc_ct_priv *ct;
};
struct mlx5e_flow_table {
@@ -231,6 +236,7 @@ struct mlx5e_accel_fs_tcp;
struct mlx5e_flow_steering {
struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_namespace *egress_ns;
#ifdef CONFIG_MLX5_EN_RXNFC
struct mlx5e_ethtool_steering ethtool;
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
index 3dc200bcfabd..69a05da0e3e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -242,8 +242,8 @@ static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
{
u32 data_size;
+ int err = 0;
u32 offset;
- int err;
for (offset = 0; offset < value_len; offset += data_size) {
data_size = value_len - offset;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
index 8fe8b4d6ad1c..254c84739046 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
@@ -51,7 +51,7 @@ static void mlx5e_monitor_counters_work(struct work_struct *work)
monitor_counters_work);
mutex_lock(&priv->state_lock);
- mlx5e_update_ndo_stats(priv);
+ mlx5e_stats_update_ndo_stats(priv);
mutex_unlock(&priv->state_lock);
mlx5e_monitor_counter_arm(priv);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 5de1cb9f5330..308fd279669e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -490,11 +490,8 @@ bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy)
int err;
int i;
- if (!MLX5_CAP_GEN(dev, pcam_reg))
- return -EOPNOTSUPP;
-
- if (!MLX5_CAP_PCAM_REG(dev, pplm))
- return -EOPNOTSUPP;
+ if (!MLX5_CAP_GEN(dev, pcam_reg) || !MLX5_CAP_PCAM_REG(dev, pplm))
+ return false;
MLX5_SET(pplm_reg, in, local_port, 1);
err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
@@ -572,6 +569,9 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy)
if (fec_policy >= (1 << MLX5E_FEC_LLRS_272_257_1) && !fec_50g_per_lane)
return -EOPNOTSUPP;
+ if (fec_policy && !mlx5e_fec_in_caps(dev, fec_policy))
+ return -EOPNOTSUPP;
+
MLX5_SET(pplm_reg, in, local_port, 1);
err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
index 906292035088..58e27038c947 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
@@ -110,11 +110,25 @@ static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
rtnl_unlock();
}
+struct neigh_update_work {
+ struct work_struct work;
+ struct neighbour *n;
+ struct mlx5e_neigh_hash_entry *nhe;
+};
+
+static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work)
+{
+ neigh_release(update_work->n);
+ mlx5e_rep_neigh_entry_release(update_work->nhe);
+ kfree(update_work);
+}
+
static void mlx5e_rep_neigh_update(struct work_struct *work)
{
- struct mlx5e_neigh_hash_entry *nhe =
- container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
- struct neighbour *n = nhe->n;
+ struct neigh_update_work *update_work = container_of(work, struct neigh_update_work,
+ work);
+ struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
+ struct neighbour *n = update_work->n;
struct mlx5e_encap_entry *e;
unsigned char ha[ETH_ALEN];
struct mlx5e_priv *priv;
@@ -146,30 +160,42 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
mlx5e_encap_put(priv, e);
}
- mlx5e_rep_neigh_entry_release(nhe);
rtnl_unlock();
- neigh_release(n);
+ mlx5e_release_neigh_update_work(update_work);
}
-static void mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv,
- struct mlx5e_neigh_hash_entry *nhe,
- struct neighbour *n)
+static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv,
+ struct neighbour *n)
{
- /* Take a reference to ensure the neighbour and mlx5 encap
- * entry won't be destructed until we drop the reference in
- * delayed work.
- */
- neigh_hold(n);
+ struct neigh_update_work *update_work;
+ struct mlx5e_neigh_hash_entry *nhe;
+ struct mlx5e_neigh m_neigh = {};
- /* This assignment is valid as long as the the neigh reference
- * is taken
- */
- nhe->n = n;
+ update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC);
+ if (WARN_ON(!update_work))
+ return NULL;
- if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
- mlx5e_rep_neigh_entry_release(nhe);
- neigh_release(n);
+ m_neigh.dev = n->dev;
+ m_neigh.family = n->ops->family;
+ memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+
+ /* Obtain reference to nhe as last step in order not to release it in
+ * atomic context.
+ */
+ rcu_read_lock();
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
+ rcu_read_unlock();
+ if (!nhe) {
+ kfree(update_work);
+ return NULL;
}
+
+ INIT_WORK(&update_work->work, mlx5e_rep_neigh_update);
+ neigh_hold(n);
+ update_work->n = n;
+ update_work->nhe = nhe;
+
+ return update_work;
}
static int mlx5e_rep_netevent_event(struct notifier_block *nb,
@@ -181,7 +207,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb,
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_neigh_hash_entry *nhe = NULL;
- struct mlx5e_neigh m_neigh = {};
+ struct neigh_update_work *update_work;
struct neigh_parms *p;
struct neighbour *n;
bool found = false;
@@ -196,17 +222,11 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb,
#endif
return NOTIFY_DONE;
- m_neigh.dev = n->dev;
- m_neigh.family = n->ops->family;
- memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
-
- rcu_read_lock();
- nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
- rcu_read_unlock();
- if (!nhe)
+ update_work = mlx5e_alloc_neigh_update_work(priv, n);
+ if (!update_work)
return NOTIFY_DONE;
- mlx5e_rep_queue_neigh_update_work(priv, nhe, n);
+ queue_work(priv->wq, &update_work->work);
break;
case NETEVENT_DELAY_PROBE_TIME_UPDATE:
@@ -352,7 +372,6 @@ int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
(*nhe)->priv = priv;
memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
- INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
spin_lock_init(&(*nhe)->encap_list_lock);
INIT_LIST_HEAD(&(*nhe)->encap_list);
refcount_set(&(*nhe)->refcnt, 1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 79cc42d88eec..e36e505d38ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -12,7 +12,7 @@
#include "neigh.h"
#include "en_rep.h"
#include "eswitch.h"
-#include "esw/chains.h"
+#include "lib/fs_chains.h"
#include "en/tc_ct.h"
#include "en/mapping.h"
#include "en/tc_tun.h"
@@ -191,7 +191,7 @@ static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
case TC_SETUP_CLSFLOWER:
memcpy(&tmp, f, sizeof(*f));
- if (!mlx5_esw_chains_prios_supported(esw))
+ if (!mlx5_chains_prios_supported(esw_chains(esw)))
return -EOPNOTSUPP;
/* Re-use tc offload path by moving the ft flow to the
@@ -203,12 +203,12 @@ static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
*
* We only support chain 0 of FT offload.
*/
- if (tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw))
+ if (tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)))
return -EOPNOTSUPP;
if (tmp.common.chain_index != 0)
return -EOPNOTSUPP;
- tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
+ tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
tmp.common.prio++;
err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
@@ -378,12 +378,12 @@ static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type,
*
* We only support chain 0 of FT offload.
*/
- if (!mlx5_esw_chains_prios_supported(esw) ||
- tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw) ||
+ if (!mlx5_chains_prios_supported(esw_chains(esw)) ||
+ tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)) ||
tmp.common.chain_index)
return -EOPNOTSUPP;
- tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
+ tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
tmp.common.prio++;
err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags);
memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
@@ -612,7 +612,6 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
struct tc_skb_ext *tc_skb_ext;
struct mlx5_eswitch *esw;
struct mlx5e_priv *priv;
- int tunnel_moffset;
int err;
reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
@@ -626,7 +625,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
priv = netdev_priv(skb->dev);
esw = priv->mdev->priv.eswitch;
- err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain);
+ err = mlx5_get_chain_for_tag(esw_chains(esw), reg_c0, &chain);
if (err) {
netdev_dbg(priv->netdev,
"Couldn't find chain for chain tag: %d, err: %d\n",
@@ -647,13 +646,12 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
uplink_priv = &uplink_rpriv->uplink_priv;
- if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb,
+ if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb,
zone_restore_id))
return false;
}
- tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset;
- tunnel_id = reg_c1 >> (8 * tunnel_moffset);
+ tunnel_id = reg_c1 >> REG_MAPPING_SHIFT(TUNNEL_TO_REG);
return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
#endif /* CONFIG_NET_TC_SKB_EXT */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index c6bc9224c3b1..e521254d886e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -14,7 +14,7 @@
#include <linux/workqueue.h>
#include <linux/xarray.h>
-#include "esw/chains.h"
+#include "lib/fs_chains.h"
#include "en/tc_ct.h"
#include "en/mod_hdr.h"
#include "en/mapping.h"
@@ -39,8 +39,9 @@
netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
struct mlx5_tc_ct_priv {
- struct mlx5_eswitch *esw;
+ struct mlx5_core_dev *dev;
const struct net_device *netdev;
+ struct mod_hdr_tbl *mod_hdr_tbl;
struct idr fte_ids;
struct xarray tuple_ids;
struct rhashtable zone_ht;
@@ -50,13 +51,16 @@ struct mlx5_tc_ct_priv {
struct mlx5_flow_table *ct_nat;
struct mlx5_flow_table *post_ct;
struct mutex control_lock; /* guards parallel adds/dels */
+ struct mutex shared_counter_lock;
struct mapping_ctx *zone_mapping;
struct mapping_ctx *labels_mapping;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_fs_chains *chains;
};
struct mlx5_ct_flow {
- struct mlx5_esw_flow_attr pre_ct_attr;
- struct mlx5_esw_flow_attr post_ct_attr;
+ struct mlx5_flow_attr *pre_ct_attr;
+ struct mlx5_flow_attr *post_ct_attr;
struct mlx5_flow_handle *pre_ct_rule;
struct mlx5_flow_handle *post_ct_rule;
struct mlx5_ct_ft *ft;
@@ -67,12 +71,12 @@ struct mlx5_ct_flow {
struct mlx5_ct_zone_rule {
struct mlx5_flow_handle *rule;
struct mlx5e_mod_hdr_handle *mh;
- struct mlx5_esw_flow_attr attr;
+ struct mlx5_flow_attr *attr;
bool nat;
};
struct mlx5_tc_ct_pre {
- struct mlx5_flow_table *fdb;
+ struct mlx5_flow_table *ft;
struct mlx5_flow_group *flow_grp;
struct mlx5_flow_group *miss_grp;
struct mlx5_flow_handle *flow_rule;
@@ -114,11 +118,16 @@ struct mlx5_ct_tuple {
u16 zone;
};
+struct mlx5_ct_shared_counter {
+ struct mlx5_fc *counter;
+ refcount_t refcount;
+};
+
struct mlx5_ct_entry {
struct rhash_head node;
struct rhash_head tuple_node;
struct rhash_head tuple_nat_node;
- struct mlx5_fc *counter;
+ struct mlx5_ct_shared_counter *shared_counter;
unsigned long cookie;
unsigned long restore_cookie;
struct mlx5_ct_tuple tuple;
@@ -157,18 +166,6 @@ static const struct rhashtable_params tuples_nat_ht_params = {
.min_size = 16 * 1024,
};
-static struct mlx5_tc_ct_priv *
-mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_rep_uplink_priv *uplink_priv;
- struct mlx5e_rep_priv *uplink_rpriv;
-
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
- uplink_priv = &uplink_rpriv->uplink_priv;
- return uplink_priv->ct_priv;
-}
-
static int
mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
{
@@ -246,8 +243,10 @@ mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
ip6_offset /= 4;
- if (ip6_offset < 8)
+ if (ip6_offset < 4)
tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
+ else if (ip6_offset < 8)
+ tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
else
return -EOPNOTSUPP;
break;
@@ -395,20 +394,30 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
}
static void
+mlx5_tc_ct_shared_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
+{
+ if (!refcount_dec_and_test(&entry->shared_counter->refcount))
+ return;
+
+ mlx5_fc_destroy(ct_priv->dev, entry->shared_counter->counter);
+ kfree(entry->shared_counter);
+}
+
+static void
mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_ct_entry *entry,
bool nat)
{
struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
- struct mlx5_esw_flow_attr *attr = &zone_rule->attr;
- struct mlx5_eswitch *esw = ct_priv->esw;
+ struct mlx5_flow_attr *attr = zone_rule->attr;
ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
- mlx5_eswitch_del_offloaded_rule(esw, zone_rule->rule, attr);
- mlx5e_mod_hdr_detach(ct_priv->esw->dev,
- &esw->offloads.mod_hdr, zone_rule->mh);
+ mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
+ mlx5e_mod_hdr_detach(ct_priv->dev,
+ ct_priv->mod_hdr_tbl, zone_rule->mh);
mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
+ kfree(attr);
}
static void
@@ -417,8 +426,6 @@ mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
{
mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
-
- mlx5_fc_destroy(ct_priv->esw->dev, entry->counter);
}
static struct flow_action_entry *
@@ -444,29 +451,40 @@ mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
u32 labels_id,
u8 zone_restore_id)
{
- struct mlx5_eswitch *esw = ct_priv->esw;
+ enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
+ struct mlx5_core_dev *dev = ct_priv->dev;
int err;
- err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
CTSTATE_TO_REG, ct_state);
if (err)
return err;
- err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
MARK_TO_REG, mark);
if (err)
return err;
- err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
LABELS_TO_REG, labels_id);
if (err)
return err;
- err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
ZONE_RESTORE_TO_REG, zone_restore_id);
if (err)
return err;
+ /* Make another copy of zone id in reg_b for
+ * NIC rx flows since we don't copy reg_c1 to
+ * reg_b upon miss.
+ */
+ if (ns != MLX5_FLOW_NAMESPACE_FDB) {
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
+ if (err)
+ return err;
+ }
return 0;
}
@@ -547,7 +565,7 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5e_tc_mod_hdr_acts *mod_acts)
{
struct flow_action *flow_action = &flow_rule->action;
- struct mlx5_core_dev *mdev = ct_priv->esw->dev;
+ struct mlx5_core_dev *mdev = ct_priv->dev;
struct flow_action_entry *act;
size_t action_size;
char *modact;
@@ -558,8 +576,7 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
case FLOW_ACTION_MANGLE: {
- err = alloc_mod_hdr_actions(mdev,
- MLX5_FLOW_NAMESPACE_FDB,
+ err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
mod_acts);
if (err)
return err;
@@ -588,7 +605,7 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
static int
mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
- struct mlx5_esw_flow_attr *attr,
+ struct mlx5_flow_attr *attr,
struct flow_rule *flow_rule,
struct mlx5e_mod_hdr_handle **mh,
u8 zone_restore_id, bool nat)
@@ -624,9 +641,9 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
if (err)
goto err_mapping;
- *mh = mlx5e_mod_hdr_attach(ct_priv->esw->dev,
- &ct_priv->esw->offloads.mod_hdr,
- MLX5_FLOW_NAMESPACE_FDB,
+ *mh = mlx5e_mod_hdr_attach(ct_priv->dev,
+ ct_priv->mod_hdr_tbl,
+ ct_priv->ns_type,
&mod_acts);
if (IS_ERR(*mh)) {
err = PTR_ERR(*mh);
@@ -650,9 +667,9 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
bool nat, u8 zone_restore_id)
{
struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
- struct mlx5_esw_flow_attr *attr = &zone_rule->attr;
- struct mlx5_eswitch *esw = ct_priv->esw;
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
struct mlx5_flow_spec *spec = NULL;
+ struct mlx5_flow_attr *attr;
int err;
zone_rule->nat = nat;
@@ -661,6 +678,12 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
if (!spec)
return -ENOMEM;
+ attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+ if (!attr) {
+ err = -ENOMEM;
+ goto err_attr;
+ }
+
err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
&zone_rule->mh,
zone_restore_id, nat);
@@ -674,9 +697,9 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
MLX5_FLOW_CONTEXT_ACTION_COUNT;
attr->dest_chain = 0;
attr->dest_ft = ct_priv->post_ct;
- attr->fdb = nat ? ct_priv->ct_nat : ct_priv->ct;
+ attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
attr->outer_match_level = MLX5_MATCH_L4;
- attr->counter = entry->counter;
+ attr->counter = entry->shared_counter->counter;
attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
@@ -684,38 +707,100 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
entry->tuple.zone & MLX5_CT_ZONE_MASK,
MLX5_CT_ZONE_MASK);
- zone_rule->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+ zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
if (IS_ERR(zone_rule->rule)) {
err = PTR_ERR(zone_rule->rule);
ct_dbg("Failed to add ct entry rule, nat: %d", nat);
goto err_rule;
}
+ zone_rule->attr = attr;
+
kfree(spec);
ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
return 0;
err_rule:
- mlx5e_mod_hdr_detach(ct_priv->esw->dev,
- &esw->offloads.mod_hdr, zone_rule->mh);
+ mlx5e_mod_hdr_detach(ct_priv->dev,
+ ct_priv->mod_hdr_tbl, zone_rule->mh);
+ mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
err_mod_hdr:
+ kfree(attr);
+err_attr:
kfree(spec);
return err;
}
+static struct mlx5_ct_shared_counter *
+mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_entry *entry)
+{
+ struct mlx5_ct_tuple rev_tuple = entry->tuple;
+ struct mlx5_ct_shared_counter *shared_counter;
+ struct mlx5_core_dev *dev = ct_priv->dev;
+ struct mlx5_ct_entry *rev_entry;
+ __be16 tmp_port;
+ int ret;
+
+ /* get the reversed tuple */
+ tmp_port = rev_tuple.port.src;
+ rev_tuple.port.src = rev_tuple.port.dst;
+ rev_tuple.port.dst = tmp_port;
+
+ if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ __be32 tmp_addr = rev_tuple.ip.src_v4;
+
+ rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
+ rev_tuple.ip.dst_v4 = tmp_addr;
+ } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
+
+ rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
+ rev_tuple.ip.dst_v6 = tmp_addr;
+ } else {
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ /* Use the same counter as the reverse direction */
+ mutex_lock(&ct_priv->shared_counter_lock);
+ rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple,
+ tuples_ht_params);
+ if (rev_entry) {
+ if (refcount_inc_not_zero(&rev_entry->shared_counter->refcount)) {
+ mutex_unlock(&ct_priv->shared_counter_lock);
+ return rev_entry->shared_counter;
+ }
+ }
+ mutex_unlock(&ct_priv->shared_counter_lock);
+
+ shared_counter = kzalloc(sizeof(*shared_counter), GFP_KERNEL);
+ if (!shared_counter)
+ return ERR_PTR(-ENOMEM);
+
+ shared_counter->counter = mlx5_fc_create(dev, true);
+ if (IS_ERR(shared_counter->counter)) {
+ ct_dbg("Failed to create counter for ct entry");
+ ret = PTR_ERR(shared_counter->counter);
+ kfree(shared_counter);
+ return ERR_PTR(ret);
+ }
+
+ refcount_set(&shared_counter->refcount, 1);
+ return shared_counter;
+}
+
static int
mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
struct flow_rule *flow_rule,
struct mlx5_ct_entry *entry,
u8 zone_restore_id)
{
- struct mlx5_eswitch *esw = ct_priv->esw;
int err;
- entry->counter = mlx5_fc_create(esw->dev, true);
- if (IS_ERR(entry->counter)) {
- err = PTR_ERR(entry->counter);
+ entry->shared_counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
+ if (IS_ERR(entry->shared_counter)) {
+ err = PTR_ERR(entry->shared_counter);
ct_dbg("Failed to create counter for ct entry");
return err;
}
@@ -735,7 +820,7 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
err_nat:
mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
err_orig:
- mlx5_fc_destroy(esw->dev, entry->counter);
+ mlx5_tc_ct_shared_counter_put(ct_priv, entry);
return err;
}
@@ -825,12 +910,16 @@ mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_ct_entry *entry)
{
mlx5_tc_ct_entry_del_rules(ct_priv, entry);
+ mutex_lock(&ct_priv->shared_counter_lock);
if (entry->tuple_node.next)
rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
&entry->tuple_nat_node,
tuples_nat_ht_params);
rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
tuples_ht_params);
+ mutex_unlock(&ct_priv->shared_counter_lock);
+ mlx5_tc_ct_shared_counter_put(ct_priv, entry);
+
}
static int
@@ -867,7 +956,7 @@ mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
if (!entry)
return -ENOENT;
- mlx5_fc_query_cached(entry->counter, &bytes, &packets, &lastuse);
+ mlx5_fc_query_cached(entry->shared_counter->counter, &bytes, &packets, &lastuse);
flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
FLOW_ACTION_HW_STATS_DELAYED);
@@ -940,9 +1029,7 @@ out:
return false;
}
-int
-mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv,
- struct mlx5_flow_spec *spec)
+int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
{
u32 ctstate = 0, ctstate_mask = 0;
@@ -958,14 +1045,21 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv,
return 0;
}
+void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
+{
+ if (!priv || !ct_attr->ct_labels_id)
+ return;
+
+ mapping_remove(priv->labels_mapping, ct_attr->ct_labels_id);
+}
+
int
-mlx5_tc_ct_parse_match(struct mlx5e_priv *priv,
- struct mlx5_flow_spec *spec,
- struct flow_cls_offload *f,
- struct mlx5_ct_attr *ct_attr,
- struct netlink_ext_ack *extack)
+mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ struct mlx5_ct_attr *ct_attr,
+ struct netlink_ext_ack *extack)
{
- struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct flow_dissector_key_ct *mask, *key;
bool trk, est, untrk, unest, new;
@@ -978,7 +1072,7 @@ mlx5_tc_ct_parse_match(struct mlx5e_priv *priv,
if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
return 0;
- if (!ct_priv) {
+ if (!priv) {
NL_SET_ERR_MSG_MOD(extack,
"offload of ct matching isn't available");
return -EOPNOTSUPP;
@@ -1034,7 +1128,7 @@ mlx5_tc_ct_parse_match(struct mlx5e_priv *priv,
ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
- if (mapping_add(ct_priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id))
+ if (mapping_add(priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id))
return -EOPNOTSUPP;
mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
MLX5_CT_LABELS_MASK);
@@ -1044,14 +1138,12 @@ mlx5_tc_ct_parse_match(struct mlx5e_priv *priv,
}
int
-mlx5_tc_ct_parse_action(struct mlx5e_priv *priv,
- struct mlx5_esw_flow_attr *attr,
+mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr,
const struct flow_action_entry *act,
struct netlink_ext_ack *extack)
{
- struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
-
- if (!ct_priv) {
+ if (!priv) {
NL_SET_ERR_MSG_MOD(extack,
"offload of ct action isn't available");
return -EOPNOTSUPP;
@@ -1070,8 +1162,8 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
{
struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
- struct mlx5_core_dev *dev = ct_priv->esw->dev;
- struct mlx5_flow_table *fdb = pre_ct->fdb;
+ struct mlx5_core_dev *dev = ct_priv->dev;
+ struct mlx5_flow_table *ft = pre_ct->ft;
struct mlx5_flow_destination dest = {};
struct mlx5_flow_act flow_act = {};
struct mlx5_modify_hdr *mod_hdr;
@@ -1086,14 +1178,14 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
return -ENOMEM;
zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
- err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ZONE_TO_REG, zone);
+ err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
+ ZONE_TO_REG, zone);
if (err) {
ct_dbg("Failed to set zone register mapping");
goto err_mapping;
}
- mod_hdr = mlx5_modify_header_alloc(dev,
- MLX5_FLOW_NAMESPACE_FDB,
+ mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
pre_mod_acts.num_actions,
pre_mod_acts.actions);
@@ -1119,7 +1211,7 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
dest.ft = ct_priv->post_ct;
- rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
ct_dbg("Failed to add pre ct flow rule zone %d", zone);
@@ -1130,7 +1222,7 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
/* add miss rule */
memset(spec, 0, sizeof(*spec));
dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
- rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
ct_dbg("Failed to add pre ct miss rule zone %d", zone);
@@ -1157,7 +1249,7 @@ tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
struct mlx5_tc_ct_pre *pre_ct)
{
struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
- struct mlx5_core_dev *dev = ct_priv->esw->dev;
+ struct mlx5_core_dev *dev = ct_priv->dev;
mlx5_del_flow_rules(pre_ct->flow_rule);
mlx5_del_flow_rules(pre_ct->miss_rule);
@@ -1171,7 +1263,7 @@ mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
- struct mlx5_core_dev *dev = ct_priv->esw->dev;
+ struct mlx5_core_dev *dev = ct_priv->dev;
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_namespace *ns;
struct mlx5_flow_table *ft;
@@ -1181,10 +1273,10 @@ mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
void *misc;
int err;
- ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
if (!ns) {
err = -EOPNOTSUPP;
- ct_dbg("Failed to get FDB flow namespace");
+ ct_dbg("Failed to get flow namespace");
return err;
}
@@ -1193,7 +1285,8 @@ mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
return -ENOMEM;
ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
- ft_attr.prio = FDB_TC_OFFLOAD;
+ ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ?
+ FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
ft_attr.max_fte = 2;
ft_attr.level = 1;
ft = mlx5_create_flow_table(ns, &ft_attr);
@@ -1202,7 +1295,7 @@ mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
ct_dbg("Failed to create pre ct table");
goto out_free;
}
- pre_ct->fdb = ft;
+ pre_ct->ft = ft;
/* create flow group */
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
@@ -1266,7 +1359,7 @@ mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
mlx5_destroy_flow_group(pre_ct->miss_grp);
mlx5_destroy_flow_group(pre_ct->flow_grp);
- mlx5_destroy_flow_table(pre_ct->fdb);
+ mlx5_destroy_flow_table(pre_ct->ft);
}
static int
@@ -1385,7 +1478,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
/* We translate the tc filter with CT action to the following HW model:
*
* +---------------------+
- * + fdb prio (tc chain) +
+ * + ft prio (tc chain) +
* + original match +
* +---------------------+
* | set chain miss mapping
@@ -1415,17 +1508,17 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
* +--------------+
*/
static struct mlx5_flow_handle *
-__mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
+__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *orig_spec,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
- struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
+ u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
struct mlx5_flow_spec *post_ct_spec = NULL;
- struct mlx5_eswitch *esw = ct_priv->esw;
- struct mlx5_esw_flow_attr *pre_ct_attr;
+ struct mlx5_flow_attr *pre_ct_attr;
struct mlx5_modify_hdr *mod_hdr;
struct mlx5_flow_handle *rule;
struct mlx5_ct_flow *ct_flow;
@@ -1460,10 +1553,22 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
}
ct_flow->fte_id = fte_id;
- /* Base esw attributes of both rules on original rule attribute */
- pre_ct_attr = &ct_flow->pre_ct_attr;
- memcpy(pre_ct_attr, attr, sizeof(*attr));
- memcpy(&ct_flow->post_ct_attr, attr, sizeof(*attr));
+ /* Base flow attributes of both rules on original rule attribute */
+ ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+ if (!ct_flow->pre_ct_attr) {
+ err = -ENOMEM;
+ goto err_alloc_pre;
+ }
+
+ ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+ if (!ct_flow->post_ct_attr) {
+ err = -ENOMEM;
+ goto err_alloc_post;
+ }
+
+ pre_ct_attr = ct_flow->pre_ct_attr;
+ memcpy(pre_ct_attr, attr, attr_sz);
+ memcpy(ct_flow->post_ct_attr, attr, attr_sz);
/* Modify the original rule's action to fwd and modify, leave decap */
pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
@@ -1474,22 +1579,22 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
* don't go though all prios of this chain as normal tc rules
* miss.
*/
- err = mlx5_esw_chains_get_chain_mapping(esw, attr->chain,
- &chain_mapping);
+ err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
+ &chain_mapping);
if (err) {
ct_dbg("Failed to get chain register mapping for chain");
goto err_get_chain;
}
ct_flow->chain_mapping = chain_mapping;
- err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
CHAIN_TO_REG, chain_mapping);
if (err) {
ct_dbg("Failed to set chain register mapping");
goto err_mapping;
}
- err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
FTEID_TO_REG, fte_id);
if (err) {
ct_dbg("Failed to set fte_id register mapping");
@@ -1503,7 +1608,8 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
attr->chain == 0) {
u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
- err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
+ ct_priv->ns_type,
TUNNEL_TO_REG,
tun_id);
if (err) {
@@ -1512,8 +1618,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
}
}
- mod_hdr = mlx5_modify_header_alloc(esw->dev,
- MLX5_FLOW_NAMESPACE_FDB,
+ mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
pre_mod_acts.num_actions,
pre_mod_acts.actions);
if (IS_ERR(mod_hdr)) {
@@ -1529,16 +1634,16 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
fte_id, MLX5_FTE_ID_MASK);
- /* Put post_ct rule on post_ct fdb */
- ct_flow->post_ct_attr.chain = 0;
- ct_flow->post_ct_attr.prio = 0;
- ct_flow->post_ct_attr.fdb = ct_priv->post_ct;
+ /* Put post_ct rule on post_ct flow table */
+ ct_flow->post_ct_attr->chain = 0;
+ ct_flow->post_ct_attr->prio = 0;
+ ct_flow->post_ct_attr->ft = ct_priv->post_ct;
- ct_flow->post_ct_attr.inner_match_level = MLX5_MATCH_NONE;
- ct_flow->post_ct_attr.outer_match_level = MLX5_MATCH_NONE;
- ct_flow->post_ct_attr.action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
- rule = mlx5_eswitch_add_offloaded_rule(esw, post_ct_spec,
- &ct_flow->post_ct_attr);
+ ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
+ ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
+ ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
+ rule = mlx5_tc_rule_insert(priv, post_ct_spec,
+ ct_flow->post_ct_attr);
ct_flow->post_ct_rule = rule;
if (IS_ERR(ct_flow->post_ct_rule)) {
err = PTR_ERR(ct_flow->post_ct_rule);
@@ -1548,10 +1653,9 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
/* Change original rule point to ct table */
pre_ct_attr->dest_chain = 0;
- pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.fdb : ft->pre_ct.fdb;
- ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw,
- orig_spec,
- pre_ct_attr);
+ pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
+ ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
+ pre_ct_attr);
if (IS_ERR(ct_flow->pre_ct_rule)) {
err = PTR_ERR(ct_flow->pre_ct_rule);
ct_dbg("Failed to add pre ct rule");
@@ -1565,14 +1669,18 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
return rule;
err_insert_orig:
- mlx5_eswitch_del_offloaded_rule(ct_priv->esw, ct_flow->post_ct_rule,
- &ct_flow->post_ct_attr);
+ mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
+ ct_flow->post_ct_attr);
err_insert_post_ct:
mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
err_mapping:
dealloc_mod_hdr_actions(&pre_mod_acts);
- mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping);
+ mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
err_get_chain:
+ kfree(ct_flow->post_ct_attr);
+err_alloc_post:
+ kfree(ct_flow->pre_ct_attr);
+err_alloc_pre:
idr_remove(&ct_priv->fte_ids, fte_id);
err_idr:
mlx5_tc_ct_del_ft_cb(ct_priv, ft);
@@ -1584,14 +1692,14 @@ err_ft:
}
static struct mlx5_flow_handle *
-__mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv,
+__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_flow_spec *orig_spec,
- struct mlx5_esw_flow_attr *attr,
+ struct mlx5_flow_attr *attr,
struct mlx5e_tc_mod_hdr_acts *mod_acts)
{
- struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
- struct mlx5_eswitch *esw = ct_priv->esw;
- struct mlx5_esw_flow_attr *pre_ct_attr;
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
+ u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
+ struct mlx5_flow_attr *pre_ct_attr;
struct mlx5_modify_hdr *mod_hdr;
struct mlx5_flow_handle *rule;
struct mlx5_ct_flow *ct_flow;
@@ -1602,8 +1710,13 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv,
return ERR_PTR(-ENOMEM);
/* Base esw attributes on original rule attribute */
- pre_ct_attr = &ct_flow->pre_ct_attr;
- memcpy(pre_ct_attr, attr, sizeof(*attr));
+ pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+ if (!pre_ct_attr) {
+ err = -ENOMEM;
+ goto err_attr;
+ }
+
+ memcpy(pre_ct_attr, attr, attr_sz);
err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
if (err) {
@@ -1611,8 +1724,7 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv,
goto err_set_registers;
}
- mod_hdr = mlx5_modify_header_alloc(esw->dev,
- MLX5_FLOW_NAMESPACE_FDB,
+ mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
mod_acts->num_actions,
mod_acts->actions);
if (IS_ERR(mod_hdr)) {
@@ -1625,7 +1737,7 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv,
pre_ct_attr->modify_hdr = mod_hdr;
pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- rule = mlx5_eswitch_add_offloaded_rule(esw, orig_spec, pre_ct_attr);
+ rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
ct_dbg("Failed to add ct clear rule");
@@ -1633,6 +1745,7 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv,
}
attr->ct_attr.ct_flow = ct_flow;
+ ct_flow->pre_ct_attr = pre_ct_attr;
ct_flow->pre_ct_rule = rule;
return rule;
@@ -1641,61 +1754,67 @@ err_insert:
err_set_registers:
netdev_warn(priv->netdev,
"Failed to offload ct clear flow, err %d\n", err);
+ kfree(pre_ct_attr);
+err_attr:
+ kfree(ct_flow);
+
return ERR_PTR(err);
}
struct mlx5_flow_handle *
-mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
- struct mlx5_esw_flow_attr *attr,
+ struct mlx5_flow_attr *attr,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
{
bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
- struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
struct mlx5_flow_handle *rule;
- if (!ct_priv)
+ if (!priv)
return ERR_PTR(-EOPNOTSUPP);
- mutex_lock(&ct_priv->control_lock);
+ mutex_lock(&priv->control_lock);
if (clear_action)
rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
else
rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
- mutex_unlock(&ct_priv->control_lock);
+ mutex_unlock(&priv->control_lock);
return rule;
}
static void
__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5e_tc_flow *flow,
struct mlx5_ct_flow *ct_flow)
{
- struct mlx5_esw_flow_attr *pre_ct_attr = &ct_flow->pre_ct_attr;
- struct mlx5_eswitch *esw = ct_priv->esw;
+ struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
- mlx5_eswitch_del_offloaded_rule(esw, ct_flow->pre_ct_rule,
- pre_ct_attr);
- mlx5_modify_header_dealloc(esw->dev, pre_ct_attr->modify_hdr);
+ mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
+ pre_ct_attr);
+ mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
if (ct_flow->post_ct_rule) {
- mlx5_eswitch_del_offloaded_rule(esw, ct_flow->post_ct_rule,
- &ct_flow->post_ct_attr);
- mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping);
+ mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
+ ct_flow->post_ct_attr);
+ mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
}
+ kfree(ct_flow->pre_ct_attr);
+ kfree(ct_flow->post_ct_attr);
kfree(ct_flow);
}
void
-mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow,
- struct mlx5_esw_flow_attr *attr)
+mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr)
{
- struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
/* We are called on error to clean up stuff from parsing
@@ -1704,22 +1823,15 @@ mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow,
if (!ct_flow)
return;
- mutex_lock(&ct_priv->control_lock);
- __mlx5_tc_ct_delete_flow(ct_priv, ct_flow);
- mutex_unlock(&ct_priv->control_lock);
+ mutex_lock(&priv->control_lock);
+ __mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
+ mutex_unlock(&priv->control_lock);
}
static int
-mlx5_tc_ct_init_check_support(struct mlx5_eswitch *esw,
- const char **err_msg)
+mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
+ const char **err_msg)
{
-#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- /* cannot restore chain ID on HW miss */
-
- *err_msg = "tc skb extension missing";
- return -EOPNOTSUPP;
-#endif
-
if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
*err_msg = "firmware level support is missing";
return -EOPNOTSUPP;
@@ -1753,44 +1865,61 @@ mlx5_tc_ct_init_check_support(struct mlx5_eswitch *esw,
return 0;
}
-static void
-mlx5_tc_ct_init_err(struct mlx5e_rep_priv *rpriv, const char *msg, int err)
+static int
+mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv,
+ const char **err_msg)
+{
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
+ *err_msg = "firmware level support is missing";
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ const char **err_msg)
{
- if (msg)
- netdev_warn(rpriv->netdev,
- "tc ct offload not supported, %s, err: %d\n",
- msg, err);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ /* cannot restore chain ID on HW miss */
+
+ *err_msg = "tc skb extension missing";
+ return -EOPNOTSUPP;
+#endif
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
else
- netdev_warn(rpriv->netdev,
- "tc ct offload not supported, err: %d\n",
- err);
+ return mlx5_tc_ct_init_check_nic_support(priv, err_msg);
}
-int
-mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv)
+#define INIT_ERR_PREFIX "tc ct offload init failed"
+
+struct mlx5_tc_ct_priv *
+mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ struct mod_hdr_tbl *mod_hdr,
+ enum mlx5_flow_namespace_type ns_type)
{
struct mlx5_tc_ct_priv *ct_priv;
- struct mlx5e_rep_priv *rpriv;
- struct mlx5_eswitch *esw;
- struct mlx5e_priv *priv;
+ struct mlx5_core_dev *dev;
const char *msg;
int err;
- rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
- priv = netdev_priv(rpriv->netdev);
- esw = priv->mdev->priv.eswitch;
-
- err = mlx5_tc_ct_init_check_support(esw, &msg);
+ dev = priv->mdev;
+ err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg);
if (err) {
- mlx5_tc_ct_init_err(rpriv, msg, err);
+ mlx5_core_warn(dev,
+ "tc ct offload not supported, %s\n",
+ msg);
goto err_support;
}
ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
- if (!ct_priv) {
- mlx5_tc_ct_init_err(rpriv, NULL, -ENOMEM);
+ if (!ct_priv)
goto err_alloc;
- }
ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
if (IS_ERR(ct_priv->zone_mapping)) {
@@ -1804,46 +1933,51 @@ mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv)
goto err_mapping_labels;
}
- ct_priv->esw = esw;
- ct_priv->netdev = rpriv->netdev;
- ct_priv->ct = mlx5_esw_chains_create_global_table(esw);
+ ct_priv->ns_type = ns_type;
+ ct_priv->chains = chains;
+ ct_priv->netdev = priv->netdev;
+ ct_priv->dev = priv->mdev;
+ ct_priv->mod_hdr_tbl = mod_hdr;
+ ct_priv->ct = mlx5_chains_create_global_table(chains);
if (IS_ERR(ct_priv->ct)) {
err = PTR_ERR(ct_priv->ct);
- mlx5_tc_ct_init_err(rpriv, "failed to create ct table", err);
+ mlx5_core_warn(dev,
+ "%s, failed to create ct table err: %d\n",
+ INIT_ERR_PREFIX, err);
goto err_ct_tbl;
}
- ct_priv->ct_nat = mlx5_esw_chains_create_global_table(esw);
+ ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
if (IS_ERR(ct_priv->ct_nat)) {
err = PTR_ERR(ct_priv->ct_nat);
- mlx5_tc_ct_init_err(rpriv, "failed to create ct nat table",
- err);
+ mlx5_core_warn(dev,
+ "%s, failed to create ct nat table err: %d\n",
+ INIT_ERR_PREFIX, err);
goto err_ct_nat_tbl;
}
- ct_priv->post_ct = mlx5_esw_chains_create_global_table(esw);
+ ct_priv->post_ct = mlx5_chains_create_global_table(chains);
if (IS_ERR(ct_priv->post_ct)) {
err = PTR_ERR(ct_priv->post_ct);
- mlx5_tc_ct_init_err(rpriv, "failed to create post ct table",
- err);
+ mlx5_core_warn(dev,
+ "%s, failed to create post ct table err: %d\n",
+ INIT_ERR_PREFIX, err);
goto err_post_ct_tbl;
}
idr_init(&ct_priv->fte_ids);
mutex_init(&ct_priv->control_lock);
+ mutex_init(&ct_priv->shared_counter_lock);
rhashtable_init(&ct_priv->zone_ht, &zone_params);
rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
- /* Done, set ct_priv to know it initializted */
- uplink_priv->ct_priv = ct_priv;
-
- return 0;
+ return ct_priv;
err_post_ct_tbl:
- mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct_nat);
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
err_ct_nat_tbl:
- mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct);
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct);
err_ct_tbl:
mapping_destroy(ct_priv->labels_mapping);
err_mapping_labels:
@@ -1853,20 +1987,22 @@ err_mapping_zone:
err_alloc:
err_support:
- return 0;
+ return NULL;
}
void
-mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv)
+mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
{
- struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv;
+ struct mlx5_fs_chains *chains;
if (!ct_priv)
return;
- mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->post_ct);
- mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct_nat);
- mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct);
+ chains = ct_priv->chains;
+
+ mlx5_chains_destroy_global_table(chains, ct_priv->post_ct);
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
+ mlx5_chains_destroy_global_table(chains, ct_priv->ct);
mapping_destroy(ct_priv->zone_mapping);
mapping_destroy(ct_priv->labels_mapping);
@@ -1874,17 +2010,15 @@ mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv)
rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
rhashtable_destroy(&ct_priv->zone_ht);
mutex_destroy(&ct_priv->control_lock);
+ mutex_destroy(&ct_priv->shared_counter_lock);
idr_destroy(&ct_priv->fte_ids);
kfree(ct_priv);
-
- uplink_priv->ct_priv = NULL;
}
bool
-mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv,
+mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
struct sk_buff *skb, u8 zone_restore_id)
{
- struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv;
struct mlx5_ct_tuple tuple = {};
struct mlx5_ct_entry *entry;
u16 zone;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
index 3baef917a677..6503b614337c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -10,12 +10,14 @@
#include "en.h"
-struct mlx5_esw_flow_attr;
+struct mlx5_flow_attr;
struct mlx5e_tc_mod_hdr_acts;
struct mlx5_rep_uplink_priv;
struct mlx5e_tc_flow;
struct mlx5e_priv;
+struct mlx5_fs_chains;
+struct mlx5_tc_ct_priv;
struct mlx5_ct_flow;
struct nf_flowtable;
@@ -76,66 +78,82 @@ struct mlx5_ct_attr {
misc_parameters_2.metadata_reg_c_1) + 3,\
}
+#define nic_zone_restore_to_reg_ct {\
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,\
+ .moffset = 2,\
+ .mlen = 1,\
+}
+
#define REG_MAPPING_MLEN(reg) (mlx5e_tc_attr_to_reg_mappings[reg].mlen)
+#define REG_MAPPING_MOFFSET(reg) (mlx5e_tc_attr_to_reg_mappings[reg].moffset)
+#define REG_MAPPING_SHIFT(reg) (REG_MAPPING_MOFFSET(reg) * 8)
#define ZONE_RESTORE_BITS (REG_MAPPING_MLEN(ZONE_RESTORE_TO_REG) * 8)
#define ZONE_RESTORE_MAX GENMASK(ZONE_RESTORE_BITS - 1, 0)
#if IS_ENABLED(CONFIG_MLX5_TC_CT)
-int
-mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv);
+struct mlx5_tc_ct_priv *
+mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ struct mod_hdr_tbl *mod_hdr,
+ enum mlx5_flow_namespace_type ns_type);
void
-mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv);
+mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv);
+
+void
+mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr);
int
-mlx5_tc_ct_parse_match(struct mlx5e_priv *priv,
- struct mlx5_flow_spec *spec,
- struct flow_cls_offload *f,
- struct mlx5_ct_attr *ct_attr,
- struct netlink_ext_ack *extack);
-int
-mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv,
- struct mlx5_flow_spec *spec);
+mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ struct mlx5_ct_attr *ct_attr,
+ struct netlink_ext_ack *extack);
+int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec);
int
-mlx5_tc_ct_parse_action(struct mlx5e_priv *priv,
- struct mlx5_esw_flow_attr *attr,
+mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr,
const struct flow_action_entry *act,
struct netlink_ext_ack *extack);
struct mlx5_flow_handle *
-mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
- struct mlx5_esw_flow_attr *attr,
+ struct mlx5_flow_attr *attr,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
void
-mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv,
+mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
struct mlx5e_tc_flow *flow,
- struct mlx5_esw_flow_attr *attr);
+ struct mlx5_flow_attr *attr);
bool
-mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv,
+mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
struct sk_buff *skb, u8 zone_restore_id);
#else /* CONFIG_MLX5_TC_CT */
-static inline int
-mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv)
+static inline struct mlx5_tc_ct_priv *
+mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ struct mod_hdr_tbl *mod_hdr,
+ enum mlx5_flow_namespace_type ns_type)
{
- return 0;
+ return NULL;
}
static inline void
-mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv)
+mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
{
}
+static inline void
+mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr) {}
+
static inline int
-mlx5_tc_ct_parse_match(struct mlx5e_priv *priv,
- struct mlx5_flow_spec *spec,
- struct flow_cls_offload *f,
- struct mlx5_ct_attr *ct_attr,
- struct netlink_ext_ack *extack)
+mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ struct mlx5_ct_attr *ct_attr,
+ struct netlink_ext_ack *extack)
{
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
@@ -143,47 +161,44 @@ mlx5_tc_ct_parse_match(struct mlx5e_priv *priv,
return 0;
NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled.");
- netdev_warn(priv->netdev, "mlx5 tc ct offload isn't enabled.\n");
return -EOPNOTSUPP;
}
static inline int
-mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv,
- struct mlx5_flow_spec *spec)
+mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
{
return 0;
}
static inline int
-mlx5_tc_ct_parse_action(struct mlx5e_priv *priv,
- struct mlx5_esw_flow_attr *attr,
+mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr,
const struct flow_action_entry *act,
struct netlink_ext_ack *extack)
{
NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled.");
- netdev_warn(priv->netdev, "mlx5 tc ct offload isn't enabled.\n");
return -EOPNOTSUPP;
}
static inline struct mlx5_flow_handle *
-mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
+mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
- struct mlx5_esw_flow_attr *attr,
+ struct mlx5_flow_attr *attr,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
{
return ERR_PTR(-EOPNOTSUPP);
}
static inline void
-mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv,
+mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
struct mlx5e_tc_flow *flow,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
}
static inline bool
-mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv,
+mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
struct sk_buff *skb, u8 zone_restore_id)
{
if (!zone_restore_id)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 9334c9c3e208..07ee1d236ab3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -7,6 +7,21 @@
#include "en.h"
#include <linux/indirect_call_wrapper.h>
+#define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
+
+/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
+ * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
+ * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
+ * full-session WQE be cache-aligned.
+ */
+#if L1_CACHE_BYTES < 128
+#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
+#else
+#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
+#endif
+
+#define MLX5E_TX_MPW_MAX_NUM_DS (MLX5E_TX_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS)
+
#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
enum mlx5e_icosq_wqe_type {
@@ -20,6 +35,11 @@ enum mlx5e_icosq_wqe_type {
};
/* General */
+static inline bool mlx5e_skb_is_multicast(struct sk_buff *skb)
+{
+ return skb->pkt_type == PACKET_MULTICAST || skb->pkt_type == PACKET_BROADCAST;
+}
+
void mlx5e_trigger_irq(struct mlx5e_icosq *sq);
void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe);
void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
@@ -41,8 +61,6 @@ void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev);
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
-void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
- struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
@@ -105,6 +123,7 @@ struct mlx5e_tx_wqe_info {
u32 num_bytes;
u8 num_wqebbs;
u8 num_dma;
+ u8 num_fifo_pkts;
#ifdef CONFIG_MLX5_EN_TLS
struct page *resync_dump_frag_page;
#endif
@@ -189,23 +208,6 @@ static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size)
}
static inline void
-mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq,
- u16 pi, u16 nnops)
-{
- struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-
- edge_wi = wi + nnops;
-
- /* fill sq frag edge with nops to avoid wqe wrapping two pages */
- for (; wi < edge_wi; wi++) {
- memset(wi, 0, sizeof(*wi));
- wi->num_wqebbs = 1;
- mlx5e_post_nop(wq, sq->sqn, &sq->pc);
- }
- sq->stats->nop += nnops;
-}
-
-static inline void
mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
struct mlx5_wqe_ctrl_seg *ctrl)
{
@@ -223,29 +225,6 @@ mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
mlx5_write64((__be32 *)ctrl, uar_map);
}
-static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg)
-{
- return cseg && !!cseg->tis_tir_num;
-}
-
-static inline u8
-mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg,
- struct sk_buff *skb)
-{
- u8 mode;
-
- if (mlx5e_transport_inline_tx_wqe(cseg))
- return MLX5_INLINE_MODE_TCP_UDP;
-
- mode = sq->min_inline_mode;
-
- if (skb_vlan_tag_present(skb) &&
- test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state))
- mode = max_t(u8, MLX5_INLINE_MODE_L2, mode);
-
- return mode;
-}
-
static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
{
struct mlx5_core_cq *mcq;
@@ -271,6 +250,23 @@ mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size,
dma->type = map_type;
}
+static inline struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_txqsq *sq, u16 i)
+{
+ return &sq->db.skb_fifo[i & sq->skb_fifo_mask];
+}
+
+static inline void mlx5e_skb_fifo_push(struct mlx5e_txqsq *sq, struct sk_buff *skb)
+{
+ struct sk_buff **skb_item = mlx5e_skb_fifo_get(sq, sq->skb_fifo_pc++);
+
+ *skb_item = skb;
+}
+
+static inline struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_txqsq *sq)
+{
+ return *mlx5e_skb_fifo_get(sq, sq->skb_fifo_cc++);
+}
+
static inline void
mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
{
@@ -286,6 +282,14 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
}
}
+void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more);
+void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq);
+
+static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session)
+{
+ return session->ds_count == MLX5E_TX_MPW_MAX_NUM_DS;
+}
+
static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
{
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 0e6946fc121f..ae90d533a350 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -59,7 +59,7 @@ static inline bool
mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
struct mlx5e_dma_info *di, struct xdp_buff *xdp)
{
- struct mlx5e_xdp_xmit_data xdptxd;
+ struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
struct xdp_frame *xdpf;
dma_addr_t dma_addr;
@@ -122,7 +122,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
u32 *len, struct xdp_buff *xdp)
{
- struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
+ struct bpf_prog *prog = rcu_dereference(rq->xdp_prog);
u32 act;
int err;
@@ -194,18 +194,22 @@ static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size)
static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
{
- struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
+ struct mlx5e_tx_wqe *wqe;
u16 pi;
- pi = mlx5e_xdpsq_get_next_pi(sq, MLX5_SEND_WQE_MAX_WQEBBS);
- session->wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+ pi = mlx5e_xdpsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS);
+ wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+ net_prefetchw(wqe->data);
- prefetchw(session->wqe->data);
- session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
- session->pkt_count = 0;
-
- mlx5e_xdp_update_inline_state(sq);
+ *session = (struct mlx5e_tx_mpwqe) {
+ .wqe = wqe,
+ .bytes_count = 0,
+ .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
+ .pkt_count = 0,
+ .inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on),
+ };
stats->mpwqe++;
}
@@ -213,7 +217,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
{
struct mlx5_wq_cyc *wq = &sq->wq;
- struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
u16 ds_count = session->ds_count;
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
@@ -258,10 +262,10 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq
}
INDIRECT_CALLABLE_SCOPE bool
-mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd,
+mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
struct mlx5e_xdp_info *xdpi, int check_result)
{
- struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
if (unlikely(xdptxd->len > sq->hw_mtu)) {
@@ -284,8 +288,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *x
mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
- if (unlikely(mlx5e_xdp_no_room_for_inline_pkt(session) ||
- session->ds_count == MLX5E_XDP_MPW_MAX_NUM_DS))
+ if (unlikely(mlx5e_xdp_mpqwe_is_full(session)))
mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
@@ -306,7 +309,7 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
}
INDIRECT_CALLABLE_SCOPE bool
-mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd,
+mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
struct mlx5e_xdp_info *xdpi, int check_result)
{
struct mlx5_wq_cyc *wq = &sq->wq;
@@ -322,7 +325,7 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd,
struct mlx5e_xdpsq_stats *stats = sq->stats;
- prefetchw(wqe);
+ net_prefetchw(wqe);
if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
stats->err++;
@@ -445,7 +448,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
if (xsk_frames)
- xsk_umem_complete_tx(sq->umem, xsk_frames);
+ xsk_tx_completed(sq->xsk_pool, xsk_frames);
sq->stats->cqes += i;
@@ -475,7 +478,7 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
}
if (xsk_frames)
- xsk_umem_complete_tx(sq->umem, xsk_frames);
+ xsk_tx_completed(sq->xsk_pool, xsk_frames);
}
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
@@ -503,7 +506,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
- struct mlx5e_xdp_xmit_data xdptxd;
+ struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
bool ret;
@@ -563,4 +566,3 @@ void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
sq->xmit_xdp_frame = is_mpw ?
mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
}
-
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index e806c13d491f..d487e5e37162 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -38,27 +38,12 @@
#include "en/txrx.h"
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
-#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
- (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
-#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
-
-#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg))
-#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \
- DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS)
-
-/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
- * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
- * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
- * full-session WQE be cache-aligned.
- */
-#if L1_CACHE_BYTES < 128
-#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
-#else
-#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
-#endif
+#define MLX5E_XDP_TX_DS_COUNT (MLX5E_TX_WQE_EMPTY_DS_COUNT + 1 /* SG DS */)
-#define MLX5E_XDP_MPW_MAX_NUM_DS \
- (MLX5E_XDP_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS)
+#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT 16
+#define MLX5E_XDP_INLINE_WQE_SZ_THRSD \
+ (MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \
+ sizeof(struct mlx5_wqe_inline_seg))
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
@@ -73,11 +58,11 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
- struct mlx5e_xdp_xmit_data *xdptxd,
+ struct mlx5e_xmit_data *xdptxd,
struct mlx5e_xdp_info *xdpi,
int check_result));
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
- struct mlx5e_xdp_xmit_data *xdptxd,
+ struct mlx5e_xmit_data *xdptxd,
struct mlx5e_xdp_info *xdpi,
int check_result));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq));
@@ -122,30 +107,28 @@ static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
/* Enable inline WQEs to shift some load from a congested HCA (HW) to
* a less congested cpu (SW).
*/
-static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq)
+static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur)
{
u16 outstanding = sq->xdpi_fifo_pc - sq->xdpi_fifo_cc;
- struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
#define MLX5E_XDP_INLINE_WATERMARK_LOW 10
#define MLX5E_XDP_INLINE_WATERMARK_HIGH 128
- if (session->inline_on) {
- if (outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW)
- session->inline_on = 0;
- return;
- }
+ if (cur && outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW)
+ return false;
+
+ if (!cur && outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH)
+ return true;
- /* inline is false */
- if (outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH)
- session->inline_on = 1;
+ return cur;
}
-static inline bool
-mlx5e_xdp_no_room_for_inline_pkt(struct mlx5e_xdp_mpwqe *session)
+static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session)
{
- return session->inline_on &&
- session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > MLX5E_XDP_MPW_MAX_NUM_DS;
+ if (session->inline_on)
+ return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT >
+ MLX5E_TX_MPW_MAX_NUM_DS;
+ return mlx5e_tx_mpwqe_is_full(session);
}
struct mlx5e_xdp_wqe_info {
@@ -155,15 +138,16 @@ struct mlx5e_xdp_wqe_info {
static inline void
mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
- struct mlx5e_xdp_xmit_data *xdptxd,
+ struct mlx5e_xmit_data *xdptxd,
struct mlx5e_xdpsq_stats *stats)
{
- struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5_wqe_data_seg *dseg =
(struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
u32 dma_len = xdptxd->len;
session->pkt_count++;
+ session->bytes_count += dma_len;
if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) {
struct mlx5_wqe_inline_seg *inline_dseg =
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
index 331ca2b0f8a4..71e8d66fa150 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
@@ -1,31 +1,31 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/* Copyright (c) 2019 Mellanox Technologies. */
+/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */
#include <net/xdp_sock_drv.h>
-#include "umem.h"
+#include "pool.h"
#include "setup.h"
#include "en/params.h"
-static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv,
- struct xdp_umem *umem)
+static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv,
+ struct xsk_buff_pool *pool)
{
- struct device *dev = priv->mdev->device;
+ struct device *dev = mlx5_core_dma_dev(priv->mdev);
- return xsk_buff_dma_map(umem, dev, 0);
+ return xsk_pool_dma_map(pool, dev, 0);
}
-static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv,
- struct xdp_umem *umem)
+static void mlx5e_xsk_unmap_pool(struct mlx5e_priv *priv,
+ struct xsk_buff_pool *pool)
{
- return xsk_buff_dma_unmap(umem, 0);
+ return xsk_pool_dma_unmap(pool, 0);
}
-static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk)
+static int mlx5e_xsk_get_pools(struct mlx5e_xsk *xsk)
{
- if (!xsk->umems) {
- xsk->umems = kcalloc(MLX5E_MAX_NUM_CHANNELS,
- sizeof(*xsk->umems), GFP_KERNEL);
- if (unlikely(!xsk->umems))
+ if (!xsk->pools) {
+ xsk->pools = kcalloc(MLX5E_MAX_NUM_CHANNELS,
+ sizeof(*xsk->pools), GFP_KERNEL);
+ if (unlikely(!xsk->pools))
return -ENOMEM;
}
@@ -35,68 +35,68 @@ static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk)
return 0;
}
-static void mlx5e_xsk_put_umems(struct mlx5e_xsk *xsk)
+static void mlx5e_xsk_put_pools(struct mlx5e_xsk *xsk)
{
if (!--xsk->refcnt) {
- kfree(xsk->umems);
- xsk->umems = NULL;
+ kfree(xsk->pools);
+ xsk->pools = NULL;
}
}
-static int mlx5e_xsk_add_umem(struct mlx5e_xsk *xsk, struct xdp_umem *umem, u16 ix)
+static int mlx5e_xsk_add_pool(struct mlx5e_xsk *xsk, struct xsk_buff_pool *pool, u16 ix)
{
int err;
- err = mlx5e_xsk_get_umems(xsk);
+ err = mlx5e_xsk_get_pools(xsk);
if (unlikely(err))
return err;
- xsk->umems[ix] = umem;
+ xsk->pools[ix] = pool;
return 0;
}
-static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix)
+static void mlx5e_xsk_remove_pool(struct mlx5e_xsk *xsk, u16 ix)
{
- xsk->umems[ix] = NULL;
+ xsk->pools[ix] = NULL;
- mlx5e_xsk_put_umems(xsk);
+ mlx5e_xsk_put_pools(xsk);
}
-static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem)
+static bool mlx5e_xsk_is_pool_sane(struct xsk_buff_pool *pool)
{
- return xsk_umem_get_headroom(umem) <= 0xffff &&
- xsk_umem_get_chunk_size(umem) <= 0xffff;
+ return xsk_pool_get_headroom(pool) <= 0xffff &&
+ xsk_pool_get_chunk_size(pool) <= 0xffff;
}
-void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk)
+void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk)
{
- xsk->headroom = xsk_umem_get_headroom(umem);
- xsk->chunk_size = xsk_umem_get_chunk_size(umem);
+ xsk->headroom = xsk_pool_get_headroom(pool);
+ xsk->chunk_size = xsk_pool_get_chunk_size(pool);
}
static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
- struct xdp_umem *umem, u16 ix)
+ struct xsk_buff_pool *pool, u16 ix)
{
struct mlx5e_params *params = &priv->channels.params;
struct mlx5e_xsk_param xsk;
struct mlx5e_channel *c;
int err;
- if (unlikely(mlx5e_xsk_get_umem(&priv->channels.params, &priv->xsk, ix)))
+ if (unlikely(mlx5e_xsk_get_pool(&priv->channels.params, &priv->xsk, ix)))
return -EBUSY;
- if (unlikely(!mlx5e_xsk_is_umem_sane(umem)))
+ if (unlikely(!mlx5e_xsk_is_pool_sane(pool)))
return -EINVAL;
- err = mlx5e_xsk_map_umem(priv, umem);
+ err = mlx5e_xsk_map_pool(priv, pool);
if (unlikely(err))
return err;
- err = mlx5e_xsk_add_umem(&priv->xsk, umem, ix);
+ err = mlx5e_xsk_add_pool(&priv->xsk, pool, ix);
if (unlikely(err))
- goto err_unmap_umem;
+ goto err_unmap_pool;
- mlx5e_build_xsk_param(umem, &xsk);
+ mlx5e_build_xsk_param(pool, &xsk);
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
/* XSK objects will be created on open. */
@@ -112,9 +112,9 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
c = priv->channels.c[ix];
- err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
+ err = mlx5e_open_xsk(priv, params, &xsk, pool, c);
if (unlikely(err))
- goto err_remove_umem;
+ goto err_remove_pool;
mlx5e_activate_xsk(c);
@@ -132,11 +132,11 @@ err_deactivate:
mlx5e_deactivate_xsk(c);
mlx5e_close_xsk(c);
-err_remove_umem:
- mlx5e_xsk_remove_umem(&priv->xsk, ix);
+err_remove_pool:
+ mlx5e_xsk_remove_pool(&priv->xsk, ix);
-err_unmap_umem:
- mlx5e_xsk_unmap_umem(priv, umem);
+err_unmap_pool:
+ mlx5e_xsk_unmap_pool(priv, pool);
return err;
@@ -146,7 +146,7 @@ validate_closed:
*/
if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) {
err = -EINVAL;
- goto err_remove_umem;
+ goto err_remove_pool;
}
return 0;
@@ -154,45 +154,45 @@ validate_closed:
static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
{
- struct xdp_umem *umem = mlx5e_xsk_get_umem(&priv->channels.params,
+ struct xsk_buff_pool *pool = mlx5e_xsk_get_pool(&priv->channels.params,
&priv->xsk, ix);
struct mlx5e_channel *c;
- if (unlikely(!umem))
+ if (unlikely(!pool))
return -EINVAL;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
- goto remove_umem;
+ goto remove_pool;
/* XSK RQ and SQ are only created if XDP program is set. */
if (!priv->channels.params.xdp_prog)
- goto remove_umem;
+ goto remove_pool;
c = priv->channels.c[ix];
mlx5e_xsk_redirect_rqt_to_drop(priv, ix);
mlx5e_deactivate_xsk(c);
mlx5e_close_xsk(c);
-remove_umem:
- mlx5e_xsk_remove_umem(&priv->xsk, ix);
- mlx5e_xsk_unmap_umem(priv, umem);
+remove_pool:
+ mlx5e_xsk_remove_pool(&priv->xsk, ix);
+ mlx5e_xsk_unmap_pool(priv, pool);
return 0;
}
-static int mlx5e_xsk_enable_umem(struct mlx5e_priv *priv, struct xdp_umem *umem,
+static int mlx5e_xsk_enable_pool(struct mlx5e_priv *priv, struct xsk_buff_pool *pool,
u16 ix)
{
int err;
mutex_lock(&priv->state_lock);
- err = mlx5e_xsk_enable_locked(priv, umem, ix);
+ err = mlx5e_xsk_enable_locked(priv, pool, ix);
mutex_unlock(&priv->state_lock);
return err;
}
-static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix)
+static int mlx5e_xsk_disable_pool(struct mlx5e_priv *priv, u16 ix)
{
int err;
@@ -203,7 +203,7 @@ static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix)
return err;
}
-int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid)
+int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_params *params = &priv->channels.params;
@@ -212,6 +212,6 @@ int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid)
if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
return -EINVAL;
- return umem ? mlx5e_xsk_enable_umem(priv, umem, ix) :
- mlx5e_xsk_disable_umem(priv, ix);
+ return pool ? mlx5e_xsk_enable_pool(priv, pool, ix) :
+ mlx5e_xsk_disable_pool(priv, ix);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h
new file mode 100644
index 000000000000..dca0010a0866
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_XSK_POOL_H__
+#define __MLX5_EN_XSK_POOL_H__
+
+#include "en.h"
+
+static inline struct xsk_buff_pool *mlx5e_xsk_get_pool(struct mlx5e_params *params,
+ struct mlx5e_xsk *xsk, u16 ix)
+{
+ if (!xsk || !xsk->pools)
+ return NULL;
+
+ if (unlikely(ix >= params->num_channels))
+ return NULL;
+
+ return xsk->pools[ix];
+}
+
+struct mlx5e_xsk_param;
+void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk);
+
+/* .ndo_bpf callback. */
+int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid);
+
+#endif /* __MLX5_EN_XSK_POOL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
index a33a1f762c70..8e7b877d8a12 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -31,7 +31,6 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
{
struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk;
u32 cqe_bcnt32 = cqe_bcnt;
- bool consumed;
/* Check packet size. Note LRO doesn't use linear SKB */
if (unlikely(cqe_bcnt > rq->hw_mtu)) {
@@ -48,12 +47,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
xdp->data_end = xdp->data + cqe_bcnt32;
xdp_set_data_meta_invalid(xdp);
- xsk_buff_dma_sync_for_cpu(xdp);
- prefetch(xdp->data);
-
- rcu_read_lock();
- consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp);
- rcu_read_unlock();
+ xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
+ net_prefetch(xdp->data);
/* Possible flows:
* - XDP_REDIRECT to XSKMAP:
@@ -70,7 +65,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
* allocated first from the Reuse Ring, so it has enough space.
*/
- if (likely(consumed)) {
+ if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp))) {
if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
return NULL; /* page/packet was consumed by XDP */
@@ -88,7 +83,6 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
u32 cqe_bcnt)
{
struct xdp_buff *xdp = wi->di->xsk;
- bool consumed;
/* wi->offset is not used in this function, because xdp->data and the
* DMA address point directly to the necessary place. Furthermore, the
@@ -99,19 +93,15 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
xdp->data_end = xdp->data + cqe_bcnt;
xdp_set_data_meta_invalid(xdp);
- xsk_buff_dma_sync_for_cpu(xdp);
- prefetch(xdp->data);
+ xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
+ net_prefetch(xdp->data);
if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;
return NULL;
}
- rcu_read_lock();
- consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp);
- rcu_read_unlock();
-
- if (likely(consumed))
+ if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp)))
return NULL; /* page/packet was consumed by XDP */
/* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
index d147b2f13b54..7f88ccf67fdd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -19,10 +19,10 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
struct mlx5e_wqe_frag_info *wi,
u32 cqe_bcnt);
-static inline int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
+static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
struct mlx5e_dma_info *dma_info)
{
- dma_info->xsk = xsk_buff_alloc(rq->umem);
+ dma_info->xsk = xsk_buff_alloc(rq->xsk_pool);
if (!dma_info->xsk)
return -ENOMEM;
@@ -38,13 +38,13 @@ static inline int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err)
{
- if (!xsk_umem_uses_need_wakeup(rq->umem))
+ if (!xsk_uses_need_wakeup(rq->xsk_pool))
return alloc_err;
if (unlikely(alloc_err))
- xsk_set_rx_need_wakeup(rq->umem);
+ xsk_set_rx_need_wakeup(rq->xsk_pool);
else
- xsk_clear_rx_need_wakeup(rq->umem);
+ xsk_clear_rx_need_wakeup(rq->xsk_pool);
return false;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index dd9df519d383..4e574ac73019 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -45,7 +45,7 @@ static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv,
}
int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+ struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool,
struct mlx5e_channel *c)
{
struct mlx5e_channel_param *cparam;
@@ -64,7 +64,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
if (unlikely(err))
goto err_free_cparam;
- err = mlx5e_open_rq(c, params, &cparam->rq, xsk, umem, &c->xskrq);
+ err = mlx5e_open_rq(c, params, &cparam->rq, xsk, pool, &c->xskrq);
if (unlikely(err))
goto err_close_rx_cq;
@@ -72,13 +72,13 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
if (unlikely(err))
goto err_close_rq;
- /* Create a separate SQ, so that when the UMEM is disabled, we could
+ /* Create a separate SQ, so that when the buff pool is disabled, we could
* close this SQ safely and stop receiving CQEs. In other case, e.g., if
- * the XDPSQ was used instead, we might run into trouble when the UMEM
+ * the XDPSQ was used instead, we might run into trouble when the buff pool
* is disabled and then reenabled, but the SQ continues receiving CQEs
- * from the old UMEM.
+ * from the old buff pool.
*/
- err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, umem, &c->xsksq, true);
+ err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, pool, &c->xsksq, true);
if (unlikely(err))
goto err_close_tx_cq;
@@ -106,8 +106,7 @@ err_free_cparam:
void mlx5e_close_xsk(struct mlx5e_channel *c)
{
clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
- napi_synchronize(&c->napi);
- synchronize_rcu(); /* Sync with the XSK wakeup. */
+ synchronize_rcu(); /* Sync with the XSK wakeup and with NAPI. */
mlx5e_close_rq(&c->xskrq);
mlx5e_close_cq(&c->xskrq.cq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
index 0dd11b81c046..ca20f1ff5e39 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
@@ -12,7 +12,7 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
struct mlx5_core_dev *mdev);
int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+ struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool,
struct mlx5e_channel *c);
void mlx5e_close_xsk(struct mlx5e_channel *c);
void mlx5e_activate_xsk(struct mlx5e_channel *c);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index 4d892f6cecb3..fb671a457129 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -2,7 +2,7 @@
/* Copyright (c) 2019 Mellanox Technologies. */
#include "tx.h"
-#include "umem.h"
+#include "pool.h"
#include "en/xdp.h"
#include "en/params.h"
#include <net/xdp_sock_drv.h>
@@ -66,9 +66,9 @@ static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
{
- struct xdp_umem *umem = sq->umem;
+ struct xsk_buff_pool *pool = sq->xsk_pool;
+ struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
- struct mlx5e_xdp_xmit_data xdptxd;
bool work_done = true;
bool flush = false;
@@ -87,7 +87,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
break;
}
- if (!xsk_umem_consume_tx(umem, &desc)) {
+ if (!xsk_tx_peek_desc(pool, &desc)) {
/* TX will get stuck until something wakes it up by
* triggering NAPI. Currently it's expected that the
* application calls sendto() if there are consumed, but
@@ -96,11 +96,11 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
break;
}
- xdptxd.dma_addr = xsk_buff_raw_get_dma(umem, desc.addr);
- xdptxd.data = xsk_buff_raw_get_data(umem, desc.addr);
+ xdptxd.dma_addr = xsk_buff_raw_get_dma(pool, desc.addr);
+ xdptxd.data = xsk_buff_raw_get_data(pool, desc.addr);
xdptxd.len = desc.len;
- xsk_buff_raw_dma_sync_for_device(umem, xdptxd.dma_addr, xdptxd.len);
+ xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len);
ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, check_result);
@@ -119,7 +119,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xmit_xdp_doorbell(sq);
- xsk_umem_consume_tx_done(umem);
+ xsk_tx_release(pool);
}
return !(budget && work_done);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
index 39fa0a705856..a05085035f23 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -15,13 +15,13 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget);
static inline void mlx5e_xsk_update_tx_wakeup(struct mlx5e_xdpsq *sq)
{
- if (!xsk_umem_uses_need_wakeup(sq->umem))
+ if (!xsk_uses_need_wakeup(sq->xsk_pool))
return;
if (sq->pc != sq->cc)
- xsk_clear_tx_need_wakeup(sq->umem);
+ xsk_clear_tx_need_wakeup(sq->xsk_pool);
else
- xsk_set_tx_need_wakeup(sq->umem);
+ xsk_set_tx_need_wakeup(sq->xsk_pool);
}
#endif /* __MLX5_EN_XSK_TX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h
deleted file mode 100644
index bada94973586..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2019 Mellanox Technologies. */
-
-#ifndef __MLX5_EN_XSK_UMEM_H__
-#define __MLX5_EN_XSK_UMEM_H__
-
-#include "en.h"
-
-static inline struct xdp_umem *mlx5e_xsk_get_umem(struct mlx5e_params *params,
- struct mlx5e_xsk *xsk, u16 ix)
-{
- if (!xsk || !xsk->umems)
- return NULL;
-
- if (unlikely(ix >= params->num_channels))
- return NULL;
-
- return xsk->umems[ix];
-}
-
-struct mlx5e_xsk_param;
-void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk);
-
-/* .ndo_bpf callback. */
-int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid);
-
-int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries);
-
-#endif /* __MLX5_EN_XSK_UMEM_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index 110476bdeffb..899b98aca0d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -107,6 +107,9 @@ struct mlx5e_accel_tx_state {
#ifdef CONFIG_MLX5_EN_TLS
struct mlx5e_accel_tx_tls_state tls;
#endif
+#ifdef CONFIG_MLX5_EN_IPSEC
+ struct mlx5e_accel_tx_ipsec_state ipsec;
+#endif
};
static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
@@ -125,27 +128,70 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
}
#endif
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) && xfrm_offload(skb)) {
+ if (unlikely(!mlx5e_ipsec_handle_tx_skb(dev, skb, &state->ipsec)))
+ return false;
+ }
+#endif
+
return true;
}
-static inline bool mlx5e_accel_tx_finish(struct mlx5e_priv *priv,
- struct mlx5e_txqsq *sq,
- struct sk_buff *skb,
+static inline bool mlx5e_accel_tx_is_ipsec_flow(struct mlx5e_accel_tx_state *state)
+{
+#ifdef CONFIG_MLX5_EN_IPSEC
+ return mlx5e_ipsec_is_tx_flow(&state->ipsec);
+#endif
+
+ return false;
+}
+
+static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq,
+ struct mlx5e_accel_tx_state *state)
+{
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state))
+ return mlx5e_ipsec_tx_ids_len(&state->ipsec);
+#endif
+
+ return 0;
+}
+
+/* Part of the eseg touched by TX offloads */
+#define MLX5E_ACCEL_ESEG_LEN offsetof(struct mlx5_wqe_eth_seg, mss)
+
+static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
+ struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (xfrm_offload(skb))
+ mlx5e_ipsec_tx_build_eseg(priv, skb, eseg);
+#endif
+
+#if IS_ENABLED(CONFIG_GENEVE)
+ if (skb->encapsulation)
+ mlx5e_tx_tunnel_accel(skb, eseg);
+#endif
+
+ return true;
+}
+
+static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe *wqe,
- struct mlx5e_accel_tx_state *state)
+ struct mlx5e_accel_tx_state *state,
+ struct mlx5_wqe_inline_seg *inlseg)
{
#ifdef CONFIG_MLX5_EN_TLS
mlx5e_tls_handle_tx_wqe(sq, &wqe->ctrl, &state->tls);
#endif
#ifdef CONFIG_MLX5_EN_IPSEC
- if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) {
- if (unlikely(!mlx5e_ipsec_handle_tx_skb(priv, &wqe->eth, skb)))
- return false;
- }
+ if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) &&
+ state->ipsec.xo && state->ipsec.tailen)
+ mlx5e_ipsec_handle_tx_wqe(wqe, &state->ipsec, inlseg);
#endif
-
- return true;
}
static inline int mlx5e_accel_init_rx(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
index 4cdd9eac647d..97f1594cee11 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
@@ -191,7 +191,7 @@ static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft,
ft->g = kcalloc(MLX5E_ACCEL_FS_TCP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in || !ft->g) {
- kvfree(ft->g);
+ kfree(ft->g);
kvfree(in);
return -ENOMEM;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index d39989cddd90..3d45341e2216 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -560,6 +560,9 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
return;
}
+ if (mlx5_is_ipsec_device(mdev))
+ netdev->gso_partial_features |= NETIF_F_GSO_ESP;
+
mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
netdev->features |= NETIF_F_GSO_ESP;
netdev->hw_features |= NETIF_F_GSO_ESP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 0fc8b4d4f4a3..6164c7f59efb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -76,6 +76,7 @@ struct mlx5e_ipsec_stats {
};
struct mlx5e_accel_fs_esp;
+struct mlx5e_ipsec_tx;
struct mlx5e_ipsec {
struct mlx5e_priv *en_priv;
@@ -87,6 +88,7 @@ struct mlx5e_ipsec {
struct mlx5e_ipsec_stats stats;
struct workqueue_struct *wq;
struct mlx5e_accel_fs_esp *rx_fs;
+ struct mlx5e_ipsec_tx *tx_fs;
};
struct mlx5e_ipsec_esn_state {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 429428bbc903..0e45590662a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -34,6 +34,12 @@ struct mlx5e_accel_fs_esp {
struct mlx5e_accel_fs_esp_prot fs_prot[ACCEL_FS_ESP_NUM_TYPES];
};
+struct mlx5e_ipsec_tx {
+ struct mlx5_flow_table *ft;
+ struct mutex mutex; /* Protect IPsec TX steering */
+ u32 refcnt;
+};
+
/* IPsec RX flow steering */
static enum mlx5e_traffic_types fs_esp2tt(enum accel_fs_esp_type i)
{
@@ -228,8 +234,8 @@ static int rx_fs_create(struct mlx5e_priv *priv,
fs_prot->miss_rule = miss_rule;
out:
- kfree(flow_group_in);
- kfree(spec);
+ kvfree(flow_group_in);
+ kvfree(spec);
return err;
}
@@ -323,6 +329,77 @@ out:
mutex_unlock(&fs_prot->prot_mutex);
}
+/* IPsec TX flow steering */
+static int tx_create(struct mlx5e_priv *priv)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5e_ipsec *ipsec = priv->ipsec;
+ struct mlx5_flow_table *ft;
+ int err;
+
+ priv->fs.egress_ns =
+ mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_EGRESS_KERNEL);
+ if (!priv->fs.egress_ns)
+ return -EOPNOTSUPP;
+
+ ft_attr.max_fte = NUM_IPSEC_FTE;
+ ft_attr.autogroup.max_num_groups = 1;
+ ft = mlx5_create_auto_grouped_flow_table(priv->fs.egress_ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ netdev_err(priv->netdev, "fail to create ipsec tx ft err=%d\n", err);
+ return err;
+ }
+ ipsec->tx_fs->ft = ft;
+ return 0;
+}
+
+static void tx_destroy(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec *ipsec = priv->ipsec;
+
+ if (IS_ERR_OR_NULL(ipsec->tx_fs->ft))
+ return;
+
+ mlx5_destroy_flow_table(ipsec->tx_fs->ft);
+ ipsec->tx_fs->ft = NULL;
+}
+
+static int tx_ft_get(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs;
+ int err = 0;
+
+ mutex_lock(&tx_fs->mutex);
+ if (tx_fs->refcnt++)
+ goto out;
+
+ err = tx_create(priv);
+ if (err) {
+ tx_fs->refcnt--;
+ goto out;
+ }
+
+out:
+ mutex_unlock(&tx_fs->mutex);
+ return err;
+}
+
+static void tx_ft_put(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec_tx *tx_fs = priv->ipsec->tx_fs;
+
+ mutex_lock(&tx_fs->mutex);
+ if (--tx_fs->refcnt)
+ goto out;
+
+ tx_destroy(priv);
+
+out:
+ mutex_unlock(&tx_fs->mutex);
+}
+
static void setup_fte_common(struct mlx5_accel_esp_xfrm_attrs *attrs,
u32 ipsec_obj_id,
struct mlx5_flow_spec *spec,
@@ -457,6 +534,54 @@ out:
return err;
}
+static int tx_add_rule(struct mlx5e_priv *priv,
+ struct mlx5_accel_esp_xfrm_attrs *attrs,
+ u32 ipsec_obj_id,
+ struct mlx5e_ipsec_rule *ipsec_rule)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ err = tx_ft_get(priv);
+ if (err)
+ return err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ setup_fte_common(attrs, ipsec_obj_id, spec, &flow_act);
+
+ /* Add IPsec indicator in metadata_reg_a */
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+ MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a,
+ MLX5_ETH_WQE_FT_META_IPSEC);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a,
+ MLX5_ETH_WQE_FT_META_IPSEC);
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW |
+ MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT;
+ rule = mlx5_add_flow_rules(priv->ipsec->tx_fs->ft, spec, &flow_act, NULL, 0);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(priv->netdev, "fail to add ipsec rule attrs->action=0x%x, err=%d\n",
+ attrs->action, err);
+ goto out;
+ }
+
+ ipsec_rule->rule = rule;
+
+out:
+ kvfree(spec);
+ if (err)
+ tx_ft_put(priv);
+ return err;
+}
+
static void rx_del_rule(struct mlx5e_priv *priv,
struct mlx5_accel_esp_xfrm_attrs *attrs,
struct mlx5e_ipsec_rule *ipsec_rule)
@@ -470,15 +595,27 @@ static void rx_del_rule(struct mlx5e_priv *priv,
rx_ft_put(priv, attrs->is_ipv6 ? ACCEL_FS_ESP6 : ACCEL_FS_ESP4);
}
+static void tx_del_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ipsec_rule *ipsec_rule)
+{
+ mlx5_del_flow_rules(ipsec_rule->rule);
+ ipsec_rule->rule = NULL;
+
+ tx_ft_put(priv);
+}
+
int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv,
struct mlx5_accel_esp_xfrm_attrs *attrs,
u32 ipsec_obj_id,
struct mlx5e_ipsec_rule *ipsec_rule)
{
- if (!priv->ipsec->rx_fs || attrs->action != MLX5_ACCEL_ESP_ACTION_DECRYPT)
+ if (!priv->ipsec->rx_fs)
return -EOPNOTSUPP;
- return rx_add_rule(priv, attrs, ipsec_obj_id, ipsec_rule);
+ if (attrs->action == MLX5_ACCEL_ESP_ACTION_DECRYPT)
+ return rx_add_rule(priv, attrs, ipsec_obj_id, ipsec_rule);
+ else
+ return tx_add_rule(priv, attrs, ipsec_obj_id, ipsec_rule);
}
void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
@@ -488,7 +625,18 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
if (!priv->ipsec->rx_fs)
return;
- rx_del_rule(priv, attrs, ipsec_rule);
+ if (attrs->action == MLX5_ACCEL_ESP_ACTION_DECRYPT)
+ rx_del_rule(priv, attrs, ipsec_rule);
+ else
+ tx_del_rule(priv, ipsec_rule);
+}
+
+static void fs_cleanup_tx(struct mlx5e_priv *priv)
+{
+ mutex_destroy(&priv->ipsec->tx_fs->mutex);
+ WARN_ON(priv->ipsec->tx_fs->refcnt);
+ kfree(priv->ipsec->tx_fs);
+ priv->ipsec->tx_fs = NULL;
}
static void fs_cleanup_rx(struct mlx5e_priv *priv)
@@ -507,6 +655,17 @@ static void fs_cleanup_rx(struct mlx5e_priv *priv)
priv->ipsec->rx_fs = NULL;
}
+static int fs_init_tx(struct mlx5e_priv *priv)
+{
+ priv->ipsec->tx_fs =
+ kzalloc(sizeof(struct mlx5e_ipsec_tx), GFP_KERNEL);
+ if (!priv->ipsec->tx_fs)
+ return -ENOMEM;
+
+ mutex_init(&priv->ipsec->tx_fs->mutex);
+ return 0;
+}
+
static int fs_init_rx(struct mlx5e_priv *priv)
{
struct mlx5e_accel_fs_esp_prot *fs_prot;
@@ -532,13 +691,24 @@ void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_priv *priv)
if (!priv->ipsec->rx_fs)
return;
+ fs_cleanup_tx(priv);
fs_cleanup_rx(priv);
}
int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv)
{
+ int err;
+
if (!mlx5_is_ipsec_device(priv->mdev) || !priv->ipsec)
return -EOPNOTSUPP;
- return fs_init_rx(priv);
+ err = fs_init_tx(priv);
+ if (err)
+ return err;
+
+ err = fs_init_rx(priv);
+ if (err)
+ fs_cleanup_tx(priv);
+
+ return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 93a8d68815ad..11e31a3db2be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -34,7 +34,7 @@
#include <crypto/aead.h>
#include <net/xfrm.h>
#include <net/esp.h>
-
+#include "accel/ipsec_offload.h"
#include "en_accel/ipsec_rxtx.h"
#include "en_accel/ipsec.h"
#include "accel/accel.h"
@@ -233,18 +233,94 @@ static void mlx5e_ipsec_set_metadata(struct sk_buff *skb,
ntohs(mdata->content.tx.seq));
}
-bool mlx5e_ipsec_handle_tx_skb(struct mlx5e_priv *priv,
- struct mlx5_wqe_eth_seg *eseg,
- struct sk_buff *skb)
+void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st,
+ struct mlx5_wqe_inline_seg *inlseg)
+{
+ inlseg->byte_count = cpu_to_be32(ipsec_st->tailen | MLX5_INLINE_SEG);
+ esp_output_fill_trailer((u8 *)inlseg->data, 0, ipsec_st->plen, ipsec_st->xo->proto);
+}
+
+static int mlx5e_ipsec_set_state(struct mlx5e_priv *priv,
+ struct sk_buff *skb,
+ struct xfrm_state *x,
+ struct xfrm_offload *xo,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st)
+{
+ unsigned int blksize, clen, alen, plen;
+ struct crypto_aead *aead;
+ unsigned int tailen;
+
+ ipsec_st->x = x;
+ ipsec_st->xo = xo;
+ if (mlx5_is_ipsec_device(priv->mdev)) {
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+ clen = ALIGN(skb->len + 2, blksize);
+ plen = max_t(u32, clen - skb->len, 4);
+ tailen = plen + alen;
+ ipsec_st->plen = plen;
+ ipsec_st->tailen = tailen;
+ }
+
+ return 0;
+}
+
+void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
{
struct xfrm_offload *xo = xfrm_offload(skb);
- struct mlx5e_ipsec_metadata *mdata;
- struct mlx5e_ipsec_sa_entry *sa_entry;
+ struct xfrm_encap_tmpl *encap;
struct xfrm_state *x;
struct sec_path *sp;
+ u8 l3_proto;
+
+ sp = skb_sec_path(skb);
+ if (unlikely(sp->len != 1))
+ return;
+
+ x = xfrm_input_state(skb);
+ if (unlikely(!x))
+ return;
+
+ if (unlikely(!x->xso.offload_handle ||
+ (skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_IPV6))))
+ return;
+
+ mlx5e_ipsec_set_swp(skb, eseg, x->props.mode, xo);
- if (!xo)
- return true;
+ l3_proto = (x->props.family == AF_INET) ?
+ ((struct iphdr *)skb_network_header(skb))->protocol :
+ ((struct ipv6hdr *)skb_network_header(skb))->nexthdr;
+
+ if (mlx5_is_ipsec_device(priv->mdev)) {
+ eseg->flow_table_metadata |= cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC);
+ eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER);
+ encap = x->encap;
+ if (!encap) {
+ eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC) :
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC);
+ } else if (encap->encap_type == UDP_ENCAP_ESPINUDP) {
+ eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC) :
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC);
+ }
+ }
+}
+
+bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+ struct mlx5e_ipsec_metadata *mdata;
+ struct xfrm_state *x;
+ struct sec_path *sp;
sp = skb_sec_path(skb);
if (unlikely(sp->len != 1)) {
@@ -270,15 +346,21 @@ bool mlx5e_ipsec_handle_tx_skb(struct mlx5e_priv *priv,
atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer);
goto drop;
}
- mdata = mlx5e_ipsec_add_metadata(skb);
- if (IS_ERR(mdata)) {
- atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
- goto drop;
+
+ if (MLX5_CAP_GEN(priv->mdev, fpga)) {
+ mdata = mlx5e_ipsec_add_metadata(skb);
+ if (IS_ERR(mdata)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
+ goto drop;
+ }
}
- mlx5e_ipsec_set_swp(skb, eseg, x->props.mode, xo);
+
sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
sa_entry->set_iv_op(skb, x, xo);
- mlx5e_ipsec_set_metadata(skb, mdata, xo);
+ if (MLX5_CAP_GEN(priv->mdev, fpga))
+ mlx5e_ipsec_set_metadata(skb, mdata, xo);
+
+ mlx5e_ipsec_set_state(priv, skb, x, xo, ipsec_st);
return true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index f96e786db158..056dacb612b0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -43,6 +43,13 @@
#define MLX5_IPSEC_METADATA_SYNDROM_MASK (0x7F)
#define MLX5_IPSEC_METADATA_HANDLE(metadata) (((metadata) >> 8) & 0xFF)
+struct mlx5e_accel_tx_ipsec_state {
+ struct xfrm_offload *xo;
+ struct xfrm_state *x;
+ u32 tailen;
+ u32 plen;
+};
+
#ifdef CONFIG_MLX5_EN_IPSEC
struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
@@ -55,16 +62,32 @@ void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
struct xfrm_offload *xo);
void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
struct xfrm_offload *xo);
-bool mlx5e_ipsec_handle_tx_skb(struct mlx5e_priv *priv,
- struct mlx5_wqe_eth_seg *eseg,
- struct sk_buff *skb);
+bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st);
+void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe,
+ struct mlx5e_accel_tx_ipsec_state *ipsec_st,
+ struct mlx5_wqe_inline_seg *inlseg);
void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
struct sk_buff *skb,
struct mlx5_cqe64 *cqe);
+static inline unsigned int mlx5e_ipsec_tx_ids_len(struct mlx5e_accel_tx_ipsec_state *ipsec_st)
+{
+ return ipsec_st->tailen;
+}
+
static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe)
{
return !!(MLX5_IPSEC_METADATA_MARKER_MASK & be32_to_cpu(cqe->ft_metadata));
}
+
+static inline bool mlx5e_ipsec_is_tx_flow(struct mlx5e_accel_tx_ipsec_state *ipsec_st)
+{
+ return ipsec_st->x;
+}
+
+void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg);
#else
static inline
void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index acf6d80a6bb7..ccaccb9fc2f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -234,7 +234,7 @@ mlx5e_get_ktls_rx_priv_ctx(struct tls_context *tls_ctx)
/* Re-sync */
/* Runs in work context */
-static struct mlx5_wqe_ctrl_seg *
+static int
resync_post_get_progress_params(struct mlx5e_icosq *sq,
struct mlx5e_ktls_offload_context_rx *priv_rx)
{
@@ -253,20 +253,24 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq,
goto err_out;
}
- pdev = sq->channel->priv->mdev->device;
+ pdev = mlx5_core_dma_dev(sq->channel->priv->mdev);
buf->dma_addr = dma_map_single(pdev, &buf->progress,
PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(pdev, buf->dma_addr))) {
err = -ENOMEM;
- goto err_out;
+ goto err_free;
}
buf->priv_rx = priv_rx;
BUILD_BUG_ON(MLX5E_KTLS_GET_PROGRESS_WQEBBS != 1);
+
+ spin_lock(&sq->channel->async_icosq_lock);
+
if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) {
+ spin_unlock(&sq->channel->async_icosq_lock);
err = -ENOSPC;
- goto err_out;
+ goto err_dma_unmap;
}
pi = mlx5e_icosq_get_next_pi(sq, 1);
@@ -294,12 +298,18 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq,
};
icosq_fill_wi(sq, pi, &wi);
sq->pc++;
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
+ spin_unlock(&sq->channel->async_icosq_lock);
- return cseg;
+ return 0;
+err_dma_unmap:
+ dma_unmap_single(pdev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE);
+err_free:
+ kfree(buf);
err_out:
priv_rx->stats->tls_resync_req_skip++;
- return ERR_PTR(err);
+ return err;
}
/* Function is called with elevated refcount.
@@ -309,10 +319,8 @@ static void resync_handle_work(struct work_struct *work)
{
struct mlx5e_ktls_offload_context_rx *priv_rx;
struct mlx5e_ktls_rx_resync_ctx *resync;
- struct mlx5_wqe_ctrl_seg *cseg;
struct mlx5e_channel *c;
struct mlx5e_icosq *sq;
- struct mlx5_wq_cyc *wq;
resync = container_of(work, struct mlx5e_ktls_rx_resync_ctx, work);
priv_rx = container_of(resync, struct mlx5e_ktls_offload_context_rx, resync);
@@ -324,18 +332,9 @@ static void resync_handle_work(struct work_struct *work)
c = resync->priv->channels.c[priv_rx->rxq];
sq = &c->async_icosq;
- wq = &sq->wq;
-
- spin_lock(&c->async_icosq_lock);
- cseg = resync_post_get_progress_params(sq, priv_rx);
- if (IS_ERR(cseg)) {
+ if (resync_post_get_progress_params(sq, priv_rx))
refcount_dec(&resync->refcnt);
- goto unlock;
- }
- mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
-unlock:
- spin_unlock(&c->async_icosq_lock);
}
static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync,
@@ -386,16 +385,17 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
struct mlx5e_ktls_offload_context_rx *priv_rx;
struct mlx5e_ktls_rx_resync_ctx *resync;
u8 tracker_state, auth_state, *ctx;
+ struct device *dev;
u32 hw_seq;
priv_rx = buf->priv_rx;
resync = &priv_rx->resync;
-
+ dev = mlx5_core_dma_dev(resync->priv->mdev);
if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags)))
goto out;
- dma_sync_single_for_cpu(resync->priv->mdev->device, buf->dma_addr,
- PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE);
+ dma_sync_single_for_cpu(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE,
+ DMA_FROM_DEVICE);
ctx = buf->progress.ctx;
tracker_state = MLX5_GET(tls_progress_params, ctx, record_tracker_state);
@@ -411,6 +411,7 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
priv_rx->stats->tls_resync_req_end++;
out:
refcount_dec(&resync->refcnt);
+ dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE);
kfree(buf);
}
@@ -659,7 +660,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_ctx);
set_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags);
mlx5e_set_ktls_rx_priv_ctx(tls_ctx, NULL);
- napi_synchronize(&priv->channels.c[priv_rx->rxq]->napi);
+ synchronize_rcu(); /* Sync with NAPI */
if (!cancel_work_sync(&priv_rx->rule.work))
/* completion is needed, as the priv_rx in the add flow
* is maintained on the wqe info (wi), not on the socket.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index f4861545b236..b140e13fdcc8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -345,9 +345,6 @@ void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_sq_stats *stats;
struct mlx5e_sq_dma *dma;
- if (!wi->resync_dump_frag_page)
- return;
-
dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++);
stats = sq->stats;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
index ff4c740af10b..7521c9be735b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
@@ -29,12 +29,24 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc);
+static inline bool
+mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe_info *wi,
+ u32 *dma_fifo_cc)
+{
+ if (unlikely(wi->resync_dump_frag_page)) {
+ mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma_fifo_cc);
+ return true;
+ }
+ return false;
+}
#else
-static inline void
-mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
- struct mlx5e_tx_wqe_info *wi,
- u32 *dma_fifo_cc)
+static inline bool
+mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe_info *wi,
+ u32 *dma_fifo_cc)
{
+ return false;
}
#endif /* CONFIG_MLX5_EN_TLS */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index b0c31d49ff8d..6982b193ee8a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -189,12 +189,10 @@ static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
struct mlx5e_tls *tls)
{
u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
- struct mlx5e_tx_wqe *wqe;
struct sync_info info;
struct sk_buff *nskb;
int linear_len = 0;
int headln;
- u16 pi;
int i;
sq->stats->tls_ooo++;
@@ -246,9 +244,7 @@ static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
sq->stats->tls_resync_bytes += nskb->len;
mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
cpu_to_be64(info.rcd_sn));
- pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
- wqe = MLX5E_TX_FETCH_WQE(sq, pi);
- mlx5e_sq_xmit(sq, nskb, wqe, pi, true);
+ mlx5e_sq_xmit_simple(sq, nskb, true);
return true;
@@ -274,6 +270,8 @@ bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
if (!datalen)
return true;
+ mlx5e_tx_mpwqe_ensure_complete(sq);
+
tls_ctx = tls_get_ctx(skb->sk);
if (WARN_ON_ONCE(tls_ctx->netdev != netdev))
goto err_out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
index 01468ec27446..b949b9a7538b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
@@ -35,7 +35,6 @@
#include <net/sock.h>
#include "en.h"
-#include "accel/tls.h"
#include "fpga/sdk.h"
#include "en_accel/tls.h"
@@ -51,9 +50,14 @@ static const struct counter_desc mlx5e_tls_sw_stats_desc[] = {
#define NUM_TLS_SW_COUNTERS ARRAY_SIZE(mlx5e_tls_sw_stats_desc)
+static bool is_tls_atomic_stats(struct mlx5e_priv *priv)
+{
+ return priv->tls && !mlx5_accel_is_ktls_device(priv->mdev);
+}
+
int mlx5e_tls_get_count(struct mlx5e_priv *priv)
{
- if (!priv->tls)
+ if (!is_tls_atomic_stats(priv))
return 0;
return NUM_TLS_SW_COUNTERS;
@@ -63,7 +67,7 @@ int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data)
{
unsigned int i, idx = 0;
- if (!priv->tls)
+ if (!is_tls_atomic_stats(priv))
return 0;
for (i = 0; i < NUM_TLS_SW_COUNTERS; i++)
@@ -77,7 +81,7 @@ int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data)
{
int i, idx = 0;
- if (!priv->tls)
+ if (!is_tls_atomic_stats(priv))
return 0;
for (i = 0; i < NUM_TLS_SW_COUNTERS; i++)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 08270987c506..d25a56ec6876 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -32,7 +32,7 @@
#include "en.h"
#include "en/port.h"
-#include "en/xsk/umem.h"
+#include "en/xsk/pool.h"
#include "lib/clock.h"
void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
@@ -243,7 +243,6 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
return MLX5E_NUM_PFLAGS;
case ETH_SS_TEST:
return mlx5e_self_test_num(priv);
- fallthrough;
default:
return -EOPNOTSUPP;
}
@@ -1341,6 +1340,14 @@ static int mlx5e_set_tunable(struct net_device *dev,
return err;
}
+static void mlx5e_get_pause_stats(struct net_device *netdev,
+ struct ethtool_pause_stats *pause_stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_stats_pause_get(priv, pause_stats);
+}
+
void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
struct ethtool_pauseparam *pauseparam)
{
@@ -1901,7 +1908,7 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
return 0;
}
-static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
+static int set_pflag_tx_mpwqe_common(struct net_device *netdev, u32 flag, bool enable)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
@@ -1913,7 +1920,7 @@ static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
new_channels.params = priv->channels.params;
- MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_XDP_TX_MPWQE, enable);
+ MLX5E_SET_PFLAG(&new_channels.params, flag, enable);
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
priv->channels.params = new_channels.params;
@@ -1924,6 +1931,16 @@ static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
return err;
}
+static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
+{
+ return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_XDP_TX_MPWQE, enable);
+}
+
+static int set_pflag_skb_tx_mpwqe(struct net_device *netdev, bool enable)
+{
+ return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_SKB_TX_MPWQE, enable);
+}
+
static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = {
{ "rx_cqe_moder", set_pflag_rx_cqe_based_moder },
{ "tx_cqe_moder", set_pflag_tx_cqe_based_moder },
@@ -1931,6 +1948,7 @@ static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = {
{ "rx_striding_rq", set_pflag_rx_striding_rq },
{ "rx_no_csum_complete", set_pflag_rx_no_csum_complete },
{ "xdp_tx_mpwqe", set_pflag_xdp_tx_mpwqe },
+ { "skb_tx_mpwqe", set_pflag_skb_tx_mpwqe },
};
static int mlx5e_handle_pflag(struct net_device *netdev,
@@ -2033,6 +2051,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
.set_rxnfc = mlx5e_set_rxnfc,
.get_tunable = mlx5e_get_tunable,
.set_tunable = mlx5e_set_tunable,
+ .get_pause_stats = mlx5e_get_pause_stats,
.get_pauseparam = mlx5e_get_pauseparam,
.set_pauseparam = mlx5e_set_pauseparam,
.get_ts_info = mlx5e_get_ts_info,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 64d002d92250..1f48f99c0997 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -217,6 +217,9 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
break;
}
+ if (WARN_ONCE(*rule_p, "VLAN rule already exists type %d", rule_type))
+ return 0;
+
*rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(*rule_p)) {
@@ -397,8 +400,7 @@ static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv)
for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
- if (priv->fs.vlan.cvlan_filter_disabled &&
- !(priv->netdev->flags & IFF_PROMISC))
+ if (priv->fs.vlan.cvlan_filter_disabled)
mlx5e_add_any_vid_rules(priv);
}
@@ -415,8 +417,12 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
- if (priv->fs.vlan.cvlan_filter_disabled &&
- !(priv->netdev->flags & IFF_PROMISC))
+ WARN_ON_ONCE(!(test_bit(MLX5E_STATE_DESTROYING, &priv->state)));
+
+ /* must be called after DESTROY bit is set and
+ * set_rx_mode is called and flushed
+ */
+ if (priv->fs.vlan.cvlan_filter_disabled)
mlx5e_del_any_vid_rules(priv);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 83c9b2bbc4af..b416a8ee2eed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -33,7 +33,7 @@
#include <linux/mlx5/fs.h>
#include "en.h"
#include "en/params.h"
-#include "en/xsk/umem.h"
+#include "en/xsk/pool.h"
struct mlx5e_ethtool_rule {
struct list_head list;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index aebcf73f8546..b3f02aac7f26 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -57,7 +57,7 @@
#include "en/monitor_stats.h"
#include "en/health.h"
#include "en/params.h"
-#include "en/xsk/umem.h"
+#include "en/xsk/pool.h"
#include "en/xsk/setup.h"
#include "en/xsk/rx.h"
#include "en/xsk/tx.h"
@@ -158,16 +158,6 @@ static void mlx5e_update_carrier_work(struct work_struct *work)
mutex_unlock(&priv->state_lock);
}
-void mlx5e_update_ndo_stats(struct mlx5e_priv *priv)
-{
- int i;
-
- for (i = mlx5e_nic_stats_grps_num(priv) - 1; i >= 0; i--)
- if (mlx5e_nic_stats_grps[i]->update_stats_mask &
- MLX5E_NDO_UPDATE_STATS)
- mlx5e_nic_stats_grps[i]->update_stats(priv);
-}
-
static void mlx5e_update_stats_work(struct work_struct *work)
{
struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
@@ -256,12 +246,17 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
u64 npages, u8 page_shift,
- struct mlx5_core_mkey *umr_mkey)
+ struct mlx5_core_mkey *umr_mkey,
+ dma_addr_t filler_addr)
{
- int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ struct mlx5_mtt *mtt;
+ int inlen;
void *mkc;
u32 *in;
int err;
+ int i;
+
+ inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + sizeof(*mtt) * npages;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
@@ -281,6 +276,18 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
MLX5_SET(mkc, mkc, translations_octword_size,
MLX5_MTT_OCTW(npages));
MLX5_SET(mkc, mkc, log_page_size, page_shift);
+ MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
+ MLX5_MTT_OCTW(npages));
+
+ /* Initialize the mkey with all MTTs pointing to a default
+ * page (filler_addr). When the channels are activated, UMR
+ * WQEs will redirect the RX WQEs to the actual memory from
+ * the RQ's pool, while the gaps (wqe_overflow) remain mapped
+ * to the default page.
+ */
+ mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ for (i = 0 ; i < npages ; i++)
+ mtt[i].ptag = cpu_to_be64(filler_addr);
err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
@@ -292,7 +299,8 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq
{
u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq));
- return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey);
+ return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey,
+ rq->wqe_overflow.addr);
}
static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix)
@@ -360,10 +368,32 @@ static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
mlx5e_reporter_rq_cqe_err(rq);
}
+static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
+{
+ rq->wqe_overflow.page = alloc_page(GFP_KERNEL);
+ if (!rq->wqe_overflow.page)
+ return -ENOMEM;
+
+ rq->wqe_overflow.addr = dma_map_page(rq->pdev, rq->wqe_overflow.page, 0,
+ PAGE_SIZE, rq->buff.map_dir);
+ if (dma_mapping_error(rq->pdev, rq->wqe_overflow.addr)) {
+ __free_page(rq->wqe_overflow.page);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
+{
+ dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, PAGE_SIZE,
+ rq->buff.map_dir);
+ __free_page(rq->wqe_overflow.page);
+}
+
static int mlx5e_alloc_rq(struct mlx5e_channel *c,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
- struct xdp_umem *umem,
+ struct xsk_buff_pool *xsk_pool,
struct mlx5e_rq_param *rqp,
struct mlx5e_rq *rq)
{
@@ -389,9 +419,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
rq->mdev = mdev;
rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
rq->xdpsq = &c->rq_xdpsq;
- rq->umem = umem;
+ rq->xsk_pool = xsk_pool;
- if (rq->umem)
+ if (rq->xsk_pool)
rq->stats = &c->priv->channel_stats[c->ix].xskrq;
else
rq->stats = &c->priv->channel_stats[c->ix].rq;
@@ -399,16 +429,16 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
if (params->xdp_prog)
bpf_prog_inc(params->xdp_prog);
- rq->xdp_prog = params->xdp_prog;
+ RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog);
rq_xdp_ix = rq->ix;
if (xsk)
rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK;
err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix);
if (err < 0)
- goto err_rq_wq_destroy;
+ goto err_rq_xdp_prog;
- rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+ rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
pool_size = 1 << params->log_rq_mtu_frames;
@@ -417,6 +447,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
&rq->wq_ctrl);
if (err)
+ goto err_rq_xdp;
+
+ err = mlx5e_alloc_mpwqe_rq_drop_page(rq);
+ if (err)
goto err_rq_wq_destroy;
rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR];
@@ -434,18 +468,18 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
err = mlx5e_create_rq_umr_mkey(mdev, rq);
if (err)
- goto err_rq_wq_destroy;
+ goto err_rq_drop_page;
rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
err = mlx5e_rq_alloc_mpwqe_info(rq, c);
if (err)
- goto err_free;
+ goto err_rq_mkey;
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
&rq->wq_ctrl);
if (err)
- goto err_rq_wq_destroy;
+ goto err_rq_xdp;
rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
@@ -460,24 +494,24 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
GFP_KERNEL, cpu_to_node(c->cpu));
if (!rq->wqe.frags) {
err = -ENOMEM;
- goto err_free;
+ goto err_rq_wq_destroy;
}
err = mlx5e_init_di_list(rq, wq_sz, c->cpu);
if (err)
- goto err_free;
+ goto err_rq_frags;
rq->mkey_be = c->mkey_be;
}
err = mlx5e_rq_set_handlers(rq, params, xsk);
if (err)
- goto err_free;
+ goto err_free_by_rq_type;
if (xsk) {
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
MEM_TYPE_XSK_BUFF_POOL, NULL);
- xsk_buff_set_rxq_info(rq->umem, &rq->xdp_rxq);
+ xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq);
} else {
/* Create a page_pool and register it with rxq */
pp_params.order = 0;
@@ -496,13 +530,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
if (IS_ERR(rq->page_pool)) {
err = PTR_ERR(rq->page_pool);
rq->page_pool = NULL;
- goto err_free;
+ goto err_free_by_rq_type;
}
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
MEM_TYPE_PAGE_POOL, rq->page_pool);
}
if (err)
- goto err_free;
+ goto err_free_by_rq_type;
for (i = 0; i < wq_sz; i++) {
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
@@ -552,38 +586,49 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
return 0;
-err_free:
+err_free_by_rq_type:
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
kvfree(rq->mpwqe.info);
+err_rq_mkey:
mlx5_core_destroy_mkey(mdev, &rq->umr_mkey);
+err_rq_drop_page:
+ mlx5e_free_mpwqe_rq_drop_page(rq);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
- kvfree(rq->wqe.frags);
mlx5e_free_di_list(rq);
+err_rq_frags:
+ kvfree(rq->wqe.frags);
}
-
err_rq_wq_destroy:
- if (rq->xdp_prog)
- bpf_prog_put(rq->xdp_prog);
- xdp_rxq_info_unreg(&rq->xdp_rxq);
- page_pool_destroy(rq->page_pool);
mlx5_wq_destroy(&rq->wq_ctrl);
+err_rq_xdp:
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
+err_rq_xdp_prog:
+ if (params->xdp_prog)
+ bpf_prog_put(params->xdp_prog);
return err;
}
static void mlx5e_free_rq(struct mlx5e_rq *rq)
{
+ struct mlx5e_channel *c = rq->channel;
+ struct bpf_prog *old_prog = NULL;
int i;
- if (rq->xdp_prog)
- bpf_prog_put(rq->xdp_prog);
+ /* drop_rq has neither channel nor xdp_prog. */
+ if (c)
+ old_prog = rcu_dereference_protected(rq->xdp_prog,
+ lockdep_is_held(&c->priv->state_lock));
+ if (old_prog)
+ bpf_prog_put(old_prog);
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
kvfree(rq->mpwqe.info);
mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
+ mlx5e_free_mpwqe_rq_drop_page(rq);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
kvfree(rq->wqe.frags);
@@ -816,11 +861,11 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
- struct xdp_umem *umem, struct mlx5e_rq *rq)
+ struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq)
{
int err;
- err = mlx5e_alloc_rq(c, params, xsk, umem, param, rq);
+ err = mlx5e_alloc_rq(c, params, xsk, xsk_pool, param, rq);
if (err)
return err;
@@ -848,6 +893,13 @@ int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp)
__set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+ /* For CQE compression on striding RQ, use stride index provided by
+ * HW if capability is supported.
+ */
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) &&
+ MLX5_CAP_GEN(c->mdev, mini_cqe_resp_stride_index))
+ __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &c->rq.state);
+
return 0;
err_destroy_rq:
@@ -867,7 +919,7 @@ void mlx5e_activate_rq(struct mlx5e_rq *rq)
void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
{
clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
- napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
+ synchronize_rcu(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
}
void mlx5e_close_rq(struct mlx5e_rq *rq)
@@ -925,7 +977,7 @@ static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
struct mlx5e_params *params,
- struct xdp_umem *umem,
+ struct xsk_buff_pool *xsk_pool,
struct mlx5e_sq_param *param,
struct mlx5e_xdpsq *sq,
bool is_redirect)
@@ -941,9 +993,9 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
sq->uar_map = mdev->mlx5e_res.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- sq->umem = umem;
+ sq->xsk_pool = xsk_pool;
- sq->stats = sq->umem ?
+ sq->stats = sq->xsk_pool ?
&c->priv->channel_stats[c->ix].xsksq :
is_redirect ?
&c->priv->channel_stats[c->ix].xdpsq :
@@ -1040,6 +1092,7 @@ static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
{
kvfree(sq->db.wqe_info);
+ kvfree(sq->db.skb_fifo);
kvfree(sq->db.dma_fifo);
}
@@ -1051,15 +1104,19 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
sizeof(*sq->db.dma_fifo)),
GFP_KERNEL, numa);
+ sq->db.skb_fifo = kvzalloc_node(array_size(df_sz,
+ sizeof(*sq->db.skb_fifo)),
+ GFP_KERNEL, numa);
sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
sizeof(*sq->db.wqe_info)),
GFP_KERNEL, numa);
- if (!sq->db.dma_fifo || !sq->db.wqe_info) {
+ if (!sq->db.dma_fifo || !sq->db.skb_fifo || !sq->db.wqe_info) {
mlx5e_free_txqsq_db(sq);
return -ENOMEM;
}
sq->dma_fifo_mask = df_sz - 1;
+ sq->skb_fifo_mask = df_sz - 1;
return 0;
}
@@ -1070,6 +1127,12 @@ static int mlx5e_calc_sq_stop_room(struct mlx5e_txqsq *sq, u8 log_sq_size)
sq->stop_room = mlx5e_tls_get_stop_room(sq);
sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+ if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state))
+ /* A MPWQE can take up to the maximum-sized WQE + all the normal
+ * stop room can be taken if a new packet breaks the active
+ * MPWQE session and allocates its WQEs right away.
+ */
+ sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
if (WARN_ON(sq->stop_room >= sq_size)) {
netdev_err(sq->channel->netdev, "Stop room %hu is bigger than the SQ size %d\n",
@@ -1111,6 +1174,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
if (mlx5_accel_is_tls_device(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
+ if (param->is_mpw)
+ set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state);
err = mlx5e_calc_sq_stop_room(sq, params->log_sq_size);
if (err)
return err;
@@ -1312,12 +1377,10 @@ void mlx5e_tx_disable_queue(struct netdev_queue *txq)
static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
{
- struct mlx5e_channel *c = sq->channel;
struct mlx5_wq_cyc *wq = &sq->wq;
clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
- /* prevent netif_tx_wake_queue */
- napi_synchronize(&c->napi);
+ synchronize_rcu(); /* Sync with NAPI to prevent netif_tx_wake_queue. */
mlx5e_tx_disable_queue(sq->txq);
@@ -1392,10 +1455,8 @@ void mlx5e_activate_icosq(struct mlx5e_icosq *icosq)
void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
{
- struct mlx5e_channel *c = icosq->channel;
-
clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
- napi_synchronize(&c->napi);
+ synchronize_rcu(); /* Sync with NAPI. */
}
void mlx5e_close_icosq(struct mlx5e_icosq *sq)
@@ -1408,13 +1469,13 @@ void mlx5e_close_icosq(struct mlx5e_icosq *sq)
}
int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_sq_param *param, struct xdp_umem *umem,
+ struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
struct mlx5e_xdpsq *sq, bool is_redirect)
{
struct mlx5e_create_sq_param csp = {};
int err;
- err = mlx5e_alloc_xdpsq(c, params, umem, param, sq, is_redirect);
+ err = mlx5e_alloc_xdpsq(c, params, xsk_pool, param, sq, is_redirect);
if (err)
return err;
@@ -1474,7 +1535,7 @@ void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
struct mlx5e_channel *c = sq->channel;
clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
- napi_synchronize(&c->napi);
+ synchronize_rcu(); /* Sync with NAPI. */
mlx5e_destroy_sq(c->mdev, sq->sqn);
mlx5e_free_xdpsq_descs(sq);
@@ -1907,7 +1968,7 @@ static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix)
static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam,
- struct xdp_umem *umem,
+ struct xsk_buff_pool *xsk_pool,
struct mlx5e_channel **cp)
{
int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
@@ -1931,7 +1992,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
c->tstamp = &priv->tstamp;
c->ix = ix;
c->cpu = cpu;
- c->pdev = priv->mdev->device;
+ c->pdev = mlx5_core_dma_dev(priv->mdev);
c->netdev = priv->netdev;
c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
c->num_tc = params->num_tc;
@@ -1946,9 +2007,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
if (unlikely(err))
goto err_napi_del;
- if (umem) {
- mlx5e_build_xsk_param(umem, &xsk);
- err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
+ if (xsk_pool) {
+ mlx5e_build_xsk_param(xsk_pool, &xsk);
+ err = mlx5e_open_xsk(priv, params, &xsk, xsk_pool, c);
if (unlikely(err))
goto err_close_queues;
}
@@ -2119,7 +2180,7 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv,
MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable);
MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en);
- param->wq.buf_numa_node = dev_to_node(mdev->device);
+ param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
mlx5e_build_rx_cq_param(priv, params, xsk, &param->cqp);
}
@@ -2135,7 +2196,7 @@ static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1));
MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter);
- param->wq.buf_numa_node = dev_to_node(mdev->device);
+ param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
}
void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
@@ -2147,7 +2208,7 @@ void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn);
- param->wq.buf_numa_node = dev_to_node(priv->mdev->device);
+ param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev));
}
static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
@@ -2163,6 +2224,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
mlx5e_build_sq_param_common(priv, param);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
MLX5_SET(sqc, sqc, allow_swp, allow_swp);
+ param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
mlx5e_build_tx_cq_param(priv, params, &param->cqp);
}
@@ -2182,6 +2244,7 @@ void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
struct mlx5e_cq_param *param)
{
struct mlx5_core_dev *mdev = priv->mdev;
+ bool hw_stridx = false;
void *cqc = param->cqc;
u8 log_cq_size;
@@ -2189,6 +2252,7 @@ void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+ hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
log_cq_size = params->log_rq_mtu_frames;
@@ -2196,7 +2260,8 @@ void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
- MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
+ MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ?
+ MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM);
MLX5_SET(cqc, cqc, cqe_comp_en, 1);
}
@@ -2309,12 +2374,12 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
mlx5e_build_channel_param(priv, &chs->params, cparam);
for (i = 0; i < chs->num; i++) {
- struct xdp_umem *umem = NULL;
+ struct xsk_buff_pool *xsk_pool = NULL;
if (chs->params.xdp_prog)
- umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, i);
+ xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i);
- err = mlx5e_open_channel(priv, i, &chs->params, cparam, umem, &chs->c[i]);
+ err = mlx5e_open_channel(priv, i, &chs->params, cparam, xsk_pool, &chs->c[i]);
if (err)
goto err_close_channels;
}
@@ -3181,8 +3246,8 @@ static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev,
struct mlx5e_cq *cq,
struct mlx5e_cq_param *param)
{
- param->wq.buf_numa_node = dev_to_node(mdev->device);
- param->wq.db_numa_node = dev_to_node(mdev->device);
+ param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+ param->wq.db_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
return mlx5e_alloc_cq_common(mdev, param, cq);
}
@@ -3567,6 +3632,7 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
s->rx_packets += rq_stats->packets + xskrq_stats->packets;
s->rx_bytes += rq_stats->bytes + xskrq_stats->bytes;
+ s->multicast += rq_stats->mcast_packets + xskrq_stats->mcast_packets;
for (j = 0; j < priv->max_opened_tc; j++) {
struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
@@ -3582,7 +3648,6 @@ void
mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5e_vport_stats *vstats = &priv->stats.vport;
struct mlx5e_pport_stats *pstats = &priv->stats.pport;
/* In switchdev mode, monitor counters doesn't monitor
@@ -3617,12 +3682,6 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors +
stats->rx_frame_errors;
stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors;
-
- /* vport multicast also counts packets that are dropped due to steering
- * or rx out of buffer
- */
- stats->multicast =
- VPORT_COUNTER_GET(vstats, received_eth_multicast.packets);
}
static void mlx5e_set_rx_mode(struct net_device *dev)
@@ -3892,13 +3951,14 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
u16 ix;
for (ix = 0; ix < chs->params.num_channels; ix++) {
- struct xdp_umem *umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, ix);
+ struct xsk_buff_pool *xsk_pool =
+ mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix);
struct mlx5e_xsk_param xsk;
- if (!umem)
+ if (!xsk_pool)
continue;
- mlx5e_build_xsk_param(umem, &xsk);
+ mlx5e_build_xsk_param(xsk_pool, &xsk);
if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) {
u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk);
@@ -4191,6 +4251,21 @@ int mlx5e_get_vf_stats(struct net_device *dev,
}
#endif
+static bool mlx5e_gre_tunnel_inner_proto_offload_supported(struct mlx5_core_dev *mdev,
+ struct sk_buff *skb)
+{
+ switch (skb->inner_protocol) {
+ case htons(ETH_P_IP):
+ case htons(ETH_P_IPV6):
+ case htons(ETH_P_TEB):
+ return true;
+ case htons(ETH_P_MPLS_UC):
+ case htons(ETH_P_MPLS_MC):
+ return MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre);
+ }
+ return false;
+}
+
static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
struct sk_buff *skb,
netdev_features_t features)
@@ -4213,7 +4288,9 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
switch (proto) {
case IPPROTO_GRE:
- return features;
+ if (mlx5e_gre_tunnel_inner_proto_offload_supported(priv->mdev, skb))
+ return features;
+ break;
case IPPROTO_IPIP:
case IPPROTO_IPV6:
if (mlx5e_tunnel_proto_supported(priv->mdev, IPPROTO_IPIP))
@@ -4330,6 +4407,16 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
return 0;
}
+static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog)
+{
+ struct bpf_prog *old_prog;
+
+ old_prog = rcu_replace_pointer(rq->xdp_prog, prog,
+ lockdep_is_held(&rq->channel->priv->state_lock));
+ if (old_prog)
+ bpf_prog_put(old_prog);
+}
+
static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4388,29 +4475,10 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
*/
for (i = 0; i < priv->channels.num; i++) {
struct mlx5e_channel *c = priv->channels.c[i];
- bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
-
- clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
- if (xsk_open)
- clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
- napi_synchronize(&c->napi);
- /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */
-
- old_prog = xchg(&c->rq.xdp_prog, prog);
- if (old_prog)
- bpf_prog_put(old_prog);
-
- if (xsk_open) {
- old_prog = xchg(&c->xskrq.xdp_prog, prog);
- if (old_prog)
- bpf_prog_put(old_prog);
- }
- set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
- if (xsk_open)
- set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
- /* napi_schedule in case we have missed anything */
- napi_schedule(&c->napi);
+ mlx5e_rq_replace_xdp_prog(&c->rq, prog);
+ if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ mlx5e_rq_replace_xdp_prog(&c->xskrq, prog);
}
unlock:
@@ -4423,8 +4491,8 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
switch (xdp->command) {
case XDP_SETUP_PROG:
return mlx5e_xdp_set(dev, xdp->prog);
- case XDP_SETUP_XSK_UMEM:
- return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem,
+ case XDP_SETUP_XSK_POOL:
+ return mlx5e_xsk_setup_pool(dev, xdp->xsk.pool,
xdp->xsk.queue_id);
default:
return -EINVAL;
@@ -4715,6 +4783,8 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv,
params->log_sq_size = is_kdump_kernel() ?
MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE,
+ MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe));
/* XDP SQ */
MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE,
@@ -5200,7 +5270,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
.enable = mlx5e_nic_enable,
.disable = mlx5e_nic_disable,
.update_rx = mlx5e_update_nic_rx,
- .update_stats = mlx5e_update_ndo_stats,
+ .update_stats = mlx5e_stats_update_ndo_stats,
.update_carrier = mlx5e_update_carrier,
.rx_handlers = &mlx5e_rx_handlers_nic,
.max_tc = MLX5E_MAX_NUM_TC,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index e13e5d1b3eae..67247c33b9fd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -39,7 +39,6 @@
#include <net/ipv6_stubs.h>
#include "eswitch.h"
-#include "esw/chains.h"
#include "en.h"
#include "en_rep.h"
#include "en/txrx.h"
@@ -288,6 +287,14 @@ static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev)
return mlx5e_ethtool_get_rxfh_indir_size(priv);
}
+static void mlx5e_uplink_rep_get_pause_stats(struct net_device *netdev,
+ struct ethtool_pause_stats *stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_stats_pause_get(priv, stats);
+}
+
static void mlx5e_uplink_rep_get_pauseparam(struct net_device *netdev,
struct ethtool_pauseparam *pauseparam)
{
@@ -362,23 +369,11 @@ static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = {
.set_rxfh = mlx5e_set_rxfh,
.get_rxnfc = mlx5e_get_rxnfc,
.set_rxnfc = mlx5e_set_rxnfc,
+ .get_pause_stats = mlx5e_uplink_rep_get_pause_stats,
.get_pauseparam = mlx5e_uplink_rep_get_pauseparam,
.set_pauseparam = mlx5e_uplink_rep_set_pauseparam,
};
-static void mlx5e_rep_get_port_parent_id(struct net_device *dev,
- struct netdev_phys_item_id *ppid)
-{
- struct mlx5e_priv *priv;
- u64 parent_id;
-
- priv = netdev_priv(dev);
-
- parent_id = mlx5_query_nic_system_image_guid(priv->mdev);
- ppid->id_len = sizeof(parent_id);
- memcpy(ppid->id, &parent_id, sizeof(parent_id));
-}
-
static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
struct mlx5_eswitch_rep *rep)
{
@@ -603,12 +598,13 @@ static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan
return 0;
}
-static struct devlink_port *mlx5e_rep_get_devlink_port(struct net_device *dev)
+static struct devlink_port *mlx5e_rep_get_devlink_port(struct net_device *netdev)
{
- struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_core_dev *dev = priv->mdev;
- return &rpriv->dl_port;
+ return mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
}
static int mlx5e_rep_change_carrier(struct net_device *dev, bool new_carrier)
@@ -1171,7 +1167,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = {
.cleanup_tx = mlx5e_cleanup_rep_tx,
.enable = mlx5e_rep_enable,
.update_rx = mlx5e_update_rep_rx,
- .update_stats = mlx5e_update_ndo_stats,
+ .update_stats = mlx5e_stats_update_ndo_stats,
.rx_handlers = &mlx5e_rx_handlers_rep,
.max_tc = 1,
.rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR),
@@ -1189,7 +1185,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
.enable = mlx5e_uplink_rep_enable,
.disable = mlx5e_uplink_rep_disable,
.update_rx = mlx5e_update_rep_rx,
- .update_stats = mlx5e_update_ndo_stats,
+ .update_stats = mlx5e_stats_update_ndo_stats,
.update_carrier = mlx5e_update_carrier,
.rx_handlers = &mlx5e_rx_handlers_rep,
.max_tc = MLX5E_MAX_NUM_TC,
@@ -1198,63 +1194,13 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
.stats_grps_num = mlx5e_ul_rep_stats_grps_num,
};
-static bool
-is_devlink_port_supported(const struct mlx5_core_dev *dev,
- const struct mlx5e_rep_priv *rpriv)
-{
- return rpriv->rep->vport == MLX5_VPORT_UPLINK ||
- rpriv->rep->vport == MLX5_VPORT_PF ||
- mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport);
-}
-
-static int register_devlink_port(struct mlx5_core_dev *dev,
- struct mlx5e_rep_priv *rpriv)
-{
- struct devlink *devlink = priv_to_devlink(dev);
- struct mlx5_eswitch_rep *rep = rpriv->rep;
- struct devlink_port_attrs attrs = {};
- struct netdev_phys_item_id ppid = {};
- unsigned int dl_port_index = 0;
- u16 pfnum;
-
- if (!is_devlink_port_supported(dev, rpriv))
- return 0;
-
- mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid);
- dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, rep->vport);
- pfnum = PCI_FUNC(dev->pdev->devfn);
- if (rep->vport == MLX5_VPORT_UPLINK) {
- attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
- attrs.phys.port_number = pfnum;
- memcpy(attrs.switch_id.id, &ppid.id[0], ppid.id_len);
- attrs.switch_id.id_len = ppid.id_len;
- devlink_port_attrs_set(&rpriv->dl_port, &attrs);
- } else if (rep->vport == MLX5_VPORT_PF) {
- memcpy(rpriv->dl_port.attrs.switch_id.id, &ppid.id[0], ppid.id_len);
- rpriv->dl_port.attrs.switch_id.id_len = ppid.id_len;
- devlink_port_attrs_pci_pf_set(&rpriv->dl_port, pfnum);
- } else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport)) {
- memcpy(rpriv->dl_port.attrs.switch_id.id, &ppid.id[0], ppid.id_len);
- rpriv->dl_port.attrs.switch_id.id_len = ppid.id_len;
- devlink_port_attrs_pci_vf_set(&rpriv->dl_port,
- pfnum, rep->vport - 1);
- }
- return devlink_port_register(devlink, &rpriv->dl_port, dl_port_index);
-}
-
-static void unregister_devlink_port(struct mlx5_core_dev *dev,
- struct mlx5e_rep_priv *rpriv)
-{
- if (is_devlink_port_supported(dev, rpriv))
- devlink_port_unregister(&rpriv->dl_port);
-}
-
/* e-Switch vport representors */
static int
mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
const struct mlx5e_profile *profile;
struct mlx5e_rep_priv *rpriv;
+ struct devlink_port *dl_port;
struct net_device *netdev;
int nch, err;
@@ -1304,28 +1250,19 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
goto err_detach_netdev;
}
- err = register_devlink_port(dev, rpriv);
- if (err) {
- netdev_warn(netdev, "Failed to register devlink port %d\n",
- rep->vport);
- goto err_neigh_cleanup;
- }
-
err = register_netdev(netdev);
if (err) {
netdev_warn(netdev,
"Failed to register representor netdev for vport %d\n",
rep->vport);
- goto err_devlink_cleanup;
+ goto err_neigh_cleanup;
}
- if (is_devlink_port_supported(dev, rpriv))
- devlink_port_type_eth_set(&rpriv->dl_port, netdev);
+ dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
+ if (dl_port)
+ devlink_port_type_eth_set(dl_port, netdev);
return 0;
-err_devlink_cleanup:
- unregister_devlink_port(dev, rpriv);
-
err_neigh_cleanup:
mlx5e_rep_neigh_cleanup(rpriv);
@@ -1349,12 +1286,13 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *dev = priv->mdev;
+ struct devlink_port *dl_port;
void *ppriv = priv->ppriv;
- if (is_devlink_port_supported(dev, rpriv))
- devlink_port_type_clear(&rpriv->dl_port);
+ dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
+ if (dl_port)
+ devlink_port_type_clear(dl_port);
unregister_netdev(netdev);
- unregister_devlink_port(dev, rpriv);
mlx5e_rep_neigh_cleanup(rpriv);
mlx5e_detach_netdev(priv);
if (rep->vport == MLX5_VPORT_UPLINK)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 622c27ae4ac7..9020d1419bcf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -101,7 +101,6 @@ struct mlx5e_rep_priv {
struct list_head vport_sqs_list;
struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */
struct rtnl_link_stats64 prev_vf_vport_stats;
- struct devlink_port dl_port;
};
static inline
@@ -135,12 +134,6 @@ struct mlx5e_neigh_hash_entry {
/* encap list sharing the same neigh */
struct list_head encap_list;
- /* valid only when the neigh reference is taken during
- * neigh_update_work workqueue callback.
- */
- struct neighbour *n;
- struct work_struct neigh_update_work;
-
/* neigh hash entry can be deleted only when the refcount is zero.
* refcount is needed to avoid neigh hash entry removal by TC, while
* it's used by the neigh notification call.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 65828af120b7..599f5b5ebc97 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/prefetch.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
@@ -53,6 +52,7 @@
#include "en/xsk/rx.h"
#include "en/health.h"
#include "en/params.h"
+#include "en/txrx.h"
static struct sk_buff *
mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
@@ -138,8 +138,17 @@ static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
title->check_sum = mini_cqe->checksum;
title->op_own &= 0xf0;
title->op_own |= 0x01 & (cqcc >> wq->fbc.log_sz);
- title->wqe_counter = cpu_to_be16(cqd->wqe_counter);
+ /* state bit set implies linked-list striding RQ wq type and
+ * HW stride index capability supported
+ */
+ if (test_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state)) {
+ title->wqe_counter = mini_cqe->stridx;
+ return;
+ }
+
+ /* HW stride index capability not supported */
+ title->wqe_counter = cpu_to_be16(cqd->wqe_counter);
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
cqd->wqe_counter += mpwrq_get_cqe_consumed_strides(title);
else
@@ -281,8 +290,8 @@ static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq,
static inline int mlx5e_page_alloc(struct mlx5e_rq *rq,
struct mlx5e_dma_info *dma_info)
{
- if (rq->umem)
- return mlx5e_xsk_page_alloc_umem(rq, dma_info);
+ if (rq->xsk_pool)
+ return mlx5e_xsk_page_alloc_pool(rq, dma_info);
else
return mlx5e_page_alloc_pool(rq, dma_info);
}
@@ -313,7 +322,7 @@ static inline void mlx5e_page_release(struct mlx5e_rq *rq,
struct mlx5e_dma_info *dma_info,
bool recycle)
{
- if (rq->umem)
+ if (rq->xsk_pool)
/* The `recycle` parameter is ignored, and the page is always
* put into the Reuse Ring, because there is no way to return
* the page to the userspace when the interface goes down.
@@ -400,14 +409,14 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
int err;
int i;
- if (rq->umem) {
+ if (rq->xsk_pool) {
int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags;
/* Check in advance that we have enough frames, instead of
* allocating one-by-one, failing and moving frames to the
* Reuse Ring.
*/
- if (unlikely(!xsk_buff_can_alloc(rq->umem, pages_desired)))
+ if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, pages_desired)))
return -ENOMEM;
}
@@ -505,8 +514,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
/* Check in advance that we have enough frames, instead of allocating
* one-by-one, failing and moving frames to the Reuse Ring.
*/
- if (rq->umem &&
- unlikely(!xsk_buff_can_alloc(rq->umem, MLX5_MPWRQ_PAGES_PER_WQE))) {
+ if (rq->xsk_pool &&
+ unlikely(!xsk_buff_can_alloc(rq->xsk_pool, MLX5_MPWRQ_PAGES_PER_WQE))) {
err = -ENOMEM;
goto err;
}
@@ -754,7 +763,7 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
* the driver when it refills the Fill Ring.
* 2. Otherwise, busy poll by rescheduling the NAPI poll.
*/
- if (unlikely(alloc_err == -ENOMEM && rq->umem))
+ if (unlikely(alloc_err == -ENOMEM && rq->xsk_pool))
return true;
return false;
@@ -1080,6 +1089,9 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
mlx5e_enable_ecn(rq, skb);
skb->protocol = eth_type_trans(skb, netdev);
+
+ if (unlikely(mlx5e_skb_is_multicast(skb)))
+ stats->mcast_packets++;
}
static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
@@ -1132,7 +1144,6 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
struct xdp_buff xdp;
struct sk_buff *skb;
void *va, *data;
- bool consumed;
u32 frag_size;
va = page_address(di->page) + wi->offset;
@@ -1141,14 +1152,11 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
frag_size, DMA_FROM_DEVICE);
- prefetchw(va); /* xdp_frame data area */
- prefetch(data);
+ net_prefetchw(va); /* xdp_frame data area */
+ net_prefetch(data);
- rcu_read_lock();
mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
- consumed = mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp);
- rcu_read_unlock();
- if (consumed)
+ if (mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp))
return NULL; /* page/packet was consumed by XDP */
rx_headroom = xdp.data - xdp.data_hard_start;
@@ -1184,7 +1192,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
return NULL;
}
- prefetchw(skb->data);
+ net_prefetchw(skb->data);
while (byte_cnt) {
u16 frag_consumed_bytes =
@@ -1252,6 +1260,11 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
}
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+
+ if (mlx5e_cqe_regb_chain(cqe))
+ if (!mlx5e_tc_update_skb(cqe, skb))
+ goto free_wqe;
+
napi_gro_receive(rq->cq.napi, skb);
free_wqe:
@@ -1399,7 +1412,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
return NULL;
}
- prefetchw(skb->data);
+ net_prefetchw(skb->data);
if (unlikely(frag_offset >= PAGE_SIZE)) {
di++;
@@ -1438,7 +1451,6 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
struct sk_buff *skb;
void *va, *data;
u32 frag_size;
- bool consumed;
/* Check packet size. Note LRO doesn't use linear SKB */
if (unlikely(cqe_bcnt > rq->hw_mtu)) {
@@ -1452,14 +1464,11 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
frag_size, DMA_FROM_DEVICE);
- prefetchw(va); /* xdp_frame data area */
- prefetch(data);
+ net_prefetchw(va); /* xdp_frame data area */
+ net_prefetch(data);
- rcu_read_lock();
mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
- consumed = mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp);
- rcu_read_unlock();
- if (consumed) {
+ if (mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp)) {
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
return NULL; /* page/packet was consumed by XDP */
@@ -1517,6 +1526,11 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq
goto mpwrq_cqe_out;
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+
+ if (mlx5e_cqe_regb_chain(cqe))
+ if (!mlx5e_tc_update_skb(cqe, skb))
+ goto mpwrq_cqe_out;
+
napi_gro_receive(rq->cq.napi, skb);
mpwrq_cqe_out:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 46790216ce86..ce8ab1f01876 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/prefetch.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <net/udp.h>
@@ -115,7 +114,7 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
return NULL;
}
- prefetchw(skb->data);
+ net_prefetchw(skb->data);
skb_reserve(skb, NET_IP_ALIGN);
/* Reserve for ethernet and IP header */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index e3b2f59408e6..78f6a6f0a7e0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -54,6 +54,18 @@ unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv)
return total;
}
+void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv)
+{
+ mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps;
+ const unsigned int num_stats_grps = stats_grps_num(priv);
+ int i;
+
+ for (i = num_stats_grps - 1; i >= 0; i--)
+ if (stats_grps[i]->update_stats &&
+ stats_grps[i]->update_stats_mask & MLX5E_NDO_UPDATE_STATS)
+ stats_grps[i]->update_stats(priv);
+}
+
void mlx5e_stats_update(struct mlx5e_priv *priv)
{
mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps;
@@ -98,6 +110,8 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_blks) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_pkts) },
#ifdef CONFIG_MLX5_EN_TLS
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) },
@@ -353,6 +367,8 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes;
s->tx_added_vlan_packets += sq_stats->added_vlan_packets;
s->tx_nop += sq_stats->nop;
+ s->tx_mpwqe_blks += sq_stats->mpwqe_blks;
+ s->tx_mpwqe_pkts += sq_stats->mpwqe_pkts;
s->tx_queue_stopped += sq_stats->stopped;
s->tx_queue_wake += sq_stats->wake;
s->tx_queue_dropped += sq_stats->dropped;
@@ -677,6 +693,35 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(802_3)
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
}
+#define MLX5E_READ_CTR64_BE_F(ptr, c) \
+ be64_to_cpu(*(__be64 *)((char *)ptr + \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_802_3_cntrs_grp_data_layout.c##_high)))
+
+void mlx5e_stats_pause_get(struct mlx5e_priv *priv,
+ struct ethtool_pause_stats *pause_stats)
+{
+ u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)];
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+
+ if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+ return;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, ppcnt_ieee_802_3,
+ sz, MLX5_REG_PPCNT, 0, 0);
+
+ pause_stats->tx_pause_frames =
+ MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+ a_pause_mac_ctrl_frames_transmitted);
+ pause_stats->rx_pause_frames =
+ MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+ a_pause_mac_ctrl_frames_received);
+}
+
#define PPORT_2863_OFF(c) \
MLX5_BYTE_OFF(ppcnt_reg, \
counter_set.eth_2863_cntrs_grp_data_layout.c##_high)
@@ -1527,6 +1572,8 @@ static const struct counter_desc sq_stats_desc[] = {
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) },
#ifdef CONFIG_MLX5_EN_TLS
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 2e1cca1923b9..162daaadb0d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -103,6 +103,10 @@ unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv);
void mlx5e_stats_update(struct mlx5e_priv *priv);
void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx);
void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data);
+void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv);
+
+void mlx5e_stats_pause_get(struct mlx5e_priv *priv,
+ struct ethtool_pause_stats *pause_stats);
/* Concrete NIC Stats */
@@ -117,8 +121,11 @@ struct mlx5e_sw_stats {
u64 tx_tso_inner_bytes;
u64 tx_added_vlan_packets;
u64 tx_nop;
+ u64 tx_mpwqe_blks;
+ u64 tx_mpwqe_pkts;
u64 rx_lro_packets;
u64 rx_lro_bytes;
+ u64 rx_mcast_packets;
u64 rx_ecn_mark;
u64 rx_removed_vlan_packets;
u64 rx_csum_unnecessary;
@@ -298,6 +305,7 @@ struct mlx5e_rq_stats {
u64 csum_none;
u64 lro_packets;
u64 lro_bytes;
+ u64 mcast_packets;
u64 ecn_mark;
u64 removed_vlan_packets;
u64 xdp_drop;
@@ -345,6 +353,8 @@ struct mlx5e_sq_stats {
u64 csum_partial_inner;
u64 added_vlan_packets;
u64 nop;
+ u64 mpwqe_blks;
+ u64 mpwqe_pkts;
#ifdef CONFIG_MLX5_EN_TLS
u64 tls_encrypted_packets;
u64 tls_encrypted_bytes;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index fd53d101d8fd..e3a968e9e2a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -57,7 +57,6 @@
#include "en/rep/neigh.h"
#include "en_tc.h"
#include "eswitch.h"
-#include "esw/chains.h"
#include "fs_core.h"
#include "en/port.h"
#include "en/tc_tun.h"
@@ -66,20 +65,11 @@
#include "en/mod_hdr.h"
#include "lib/devcom.h"
#include "lib/geneve.h"
+#include "lib/fs_chains.h"
#include "diag/en_tc_tracepoint.h"
+#define nic_chains(priv) ((priv)->fs.tc.chains)
#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
-
-struct mlx5_nic_flow_attr {
- u32 action;
- u32 flow_tag;
- struct mlx5_modify_hdr *modify_hdr;
- u32 hairpin_tirn;
- u8 match_level;
- struct mlx5_flow_table *hairpin_ft;
- struct mlx5_fc *counter;
-};
-
#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
enum {
@@ -153,11 +143,7 @@ struct mlx5e_tc_flow {
struct rcu_head rcu_head;
struct completion init_done;
int tunnel_id; /* the mapped tunnel id of this flow */
-
- union {
- struct mlx5_esw_flow_attr esw_attr[0];
- struct mlx5_nic_flow_attr nic_attr[0];
- };
+ struct mlx5_flow_attr *attr;
};
struct mlx5e_tc_flow_parse_attr {
@@ -170,7 +156,7 @@ struct mlx5e_tc_flow_parse_attr {
};
#define MLX5E_TC_TABLE_NUM_GROUPS 4
-#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
+#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
[CHAIN_TO_REG] = {
@@ -191,6 +177,16 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
[MARK_TO_REG] = mark_to_reg_ct,
[LABELS_TO_REG] = labels_to_reg_ct,
[FTEID_TO_REG] = fteid_to_reg_ct,
+ /* For NIC rules we store the retore metadata directly
+ * into reg_b that is passed to SW since we don't
+ * jump between steering domains.
+ */
+ [NIC_CHAIN_TO_REG] = {
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
+ .moffset = 0,
+ .mlen = 2,
+ },
+ [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
};
static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
@@ -244,6 +240,7 @@ mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
int
mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5_flow_namespace_type ns,
enum mlx5e_tc_attr_to_reg type,
u32 data)
{
@@ -253,8 +250,7 @@ mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
char *modact;
int err;
- err = alloc_mod_hdr_actions(mdev, MLX5_FLOW_NAMESPACE_FDB,
- mod_hdr_acts);
+ err = alloc_mod_hdr_actions(mdev, ns, mod_hdr_acts);
if (err)
return err;
@@ -275,6 +271,54 @@ mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
return 0;
}
+#define esw_offloads_mode(esw) (mlx5_eswitch_mode(esw) == MLX5_ESWITCH_OFFLOADS)
+
+static struct mlx5_tc_ct_priv *
+get_ct_priv(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ if (esw_offloads_mode(esw)) {
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ return uplink_priv->ct_priv;
+ }
+
+ return priv->fs.tc.ct;
+}
+
+struct mlx5_flow_handle *
+mlx5_tc_rule_insert(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (esw_offloads_mode(esw))
+ return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+
+ return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
+}
+
+void
+mlx5_tc_rule_delete(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (esw_offloads_mode(esw)) {
+ mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+
+ return;
+ }
+
+ mlx5e_del_offloaded_nic_rule(priv, rule, attr);
+}
+
struct mlx5e_hairpin {
struct mlx5_hairpin *pair;
@@ -370,7 +414,7 @@ static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
MLX5E_TC_FLOW_FLAG_##flag)
-static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
+bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
{
return flow_flag_test(flow, ESWITCH);
}
@@ -415,10 +459,7 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
return PTR_ERR(mh);
modify_hdr = mlx5e_mod_hdr_get(mh);
- if (mlx5e_is_eswitch_flow(flow))
- flow->esw_attr->modify_hdr = modify_hdr;
- else
- flow->nic_attr->modify_hdr = modify_hdr;
+ flow->attr->modify_hdr = modify_hdr;
flow->mh = mh;
return 0;
@@ -858,9 +899,9 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
attach_flow:
if (hpe->hp->num_channels > 1) {
flow_flag_set(flow, HAIRPIN_RSS);
- flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
+ flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
} else {
- flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
+ flow->attr->nic_attr->hairpin_tirn = hpe->hp->tirn;
}
flow->hpe = hpe;
@@ -890,129 +931,212 @@ static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
flow->hpe = NULL;
}
-static int
-mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack)
+struct mlx5_flow_handle *
+mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr)
{
- struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context;
- struct mlx5_nic_flow_attr *attr = flow->nic_attr;
- struct mlx5_core_dev *dev = priv->mdev;
+ struct mlx5_flow_context *flow_context = &spec->flow_context;
+ struct mlx5_fs_chains *nic_chains = nic_chains(priv);
+ struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
+ struct mlx5e_tc_table *tc = &priv->fs.tc;
struct mlx5_flow_destination dest[2] = {};
struct mlx5_flow_act flow_act = {
.action = attr->action,
.flags = FLOW_ACT_NO_APPEND,
};
- struct mlx5_fc *counter = NULL;
- int err, dest_ix = 0;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_table *ft;
+ int dest_ix = 0;
flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
- flow_context->flow_tag = attr->flow_tag;
-
- if (flow_flag_test(flow, HAIRPIN)) {
- err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
- if (err)
- return err;
+ flow_context->flow_tag = nic_attr->flow_tag;
- if (flow_flag_test(flow, HAIRPIN_RSS)) {
- dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest[dest_ix].ft = attr->hairpin_ft;
- } else {
- dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dest[dest_ix].tir_num = attr->hairpin_tirn;
- }
+ if (attr->dest_ft) {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[dest_ix].ft = attr->dest_ft;
+ dest_ix++;
+ } else if (nic_attr->hairpin_ft) {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[dest_ix].ft = nic_attr->hairpin_ft;
+ dest_ix++;
+ } else if (nic_attr->hairpin_tirn) {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
dest_ix++;
} else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest[dest_ix].ft = priv->fs.vlan.ft.t;
+ if (attr->dest_chain) {
+ dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
+ attr->dest_chain, 1,
+ MLX5E_TC_FT_LEVEL);
+ if (IS_ERR(dest[dest_ix].ft))
+ return ERR_CAST(dest[dest_ix].ft);
+ } else {
+ dest[dest_ix].ft = priv->fs.vlan.ft.t;
+ }
+ dest_ix++;
+ }
+
+ if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
dest_ix++;
}
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ flow_act.modify_hdr = attr->modify_hdr;
+
+ mutex_lock(&tc->t_lock);
+ if (IS_ERR_OR_NULL(tc->t)) {
+ /* Create the root table here if doesn't exist yet */
+ tc->t =
+ mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
+
+ if (IS_ERR(tc->t)) {
+ mutex_unlock(&tc->t_lock);
+ netdev_err(priv->netdev,
+ "Failed to create tc offload table\n");
+ rule = ERR_CAST(priv->fs.tc.t);
+ goto err_ft_get;
+ }
+ }
+ mutex_unlock(&tc->t_lock);
+
+ if (attr->chain || attr->prio)
+ ft = mlx5_chains_get_table(nic_chains,
+ attr->chain, attr->prio,
+ MLX5E_TC_FT_LEVEL);
+ else
+ ft = attr->ft;
+
+ if (IS_ERR(ft)) {
+ rule = ERR_CAST(ft);
+ goto err_ft_get;
+ }
+
+ if (attr->outer_match_level != MLX5_MATCH_NONE)
+ spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+
+ rule = mlx5_add_flow_rules(ft, spec,
+ &flow_act, dest, dest_ix);
+ if (IS_ERR(rule))
+ goto err_rule;
+
+ return rule;
+
+err_rule:
+ if (attr->chain || attr->prio)
+ mlx5_chains_put_table(nic_chains,
+ attr->chain, attr->prio,
+ MLX5E_TC_FT_LEVEL);
+err_ft_get:
+ if (attr->dest_chain)
+ mlx5_chains_put_table(nic_chains,
+ attr->dest_chain, 1,
+ MLX5E_TC_FT_LEVEL);
+
+ return ERR_CAST(rule);
+}
+
+static int
+mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_core_dev *dev = priv->mdev;
+ struct mlx5_fc *counter = NULL;
+ int err;
+
+ if (flow_flag_test(flow, HAIRPIN)) {
+ err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
+ if (err)
+ return err;
+ }
+
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
counter = mlx5_fc_create(dev, true);
if (IS_ERR(counter))
return PTR_ERR(counter);
- dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest[dest_ix].counter_id = mlx5_fc_id(counter);
- dest_ix++;
attr->counter = counter;
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
- flow_act.modify_hdr = attr->modify_hdr;
dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
if (err)
return err;
}
- mutex_lock(&priv->fs.tc.t_lock);
- if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
- struct mlx5_flow_table_attr ft_attr = {};
- int tc_grp_size, tc_tbl_size, tc_num_grps;
- u32 max_flow_counter;
-
- max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
- MLX5_CAP_GEN(dev, max_flow_counter_15_0);
-
- tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
-
- tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
- BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
- tc_num_grps = MLX5E_TC_TABLE_NUM_GROUPS;
-
- ft_attr.prio = MLX5E_TC_PRIO;
- ft_attr.max_fte = tc_tbl_size;
- ft_attr.level = MLX5E_TC_FT_LEVEL;
- ft_attr.autogroup.max_num_groups = tc_num_grps;
- priv->fs.tc.t =
- mlx5_create_auto_grouped_flow_table(priv->fs.ns,
- &ft_attr);
- if (IS_ERR(priv->fs.tc.t)) {
- mutex_unlock(&priv->fs.tc.t_lock);
- NL_SET_ERR_MSG_MOD(extack,
- "Failed to create tc offload table");
- netdev_err(priv->netdev,
- "Failed to create tc offload table\n");
- return PTR_ERR(priv->fs.tc.t);
- }
- }
+ if (flow_flag_test(flow, CT))
+ flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec,
+ attr, &parse_attr->mod_hdr_acts);
+ else
+ flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
+ attr);
- if (attr->match_level != MLX5_MATCH_NONE)
- parse_attr->spec.match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+ return PTR_ERR_OR_ZERO(flow->rule[0]);
+}
- flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
- &flow_act, dest, dest_ix);
- mutex_unlock(&priv->fs.tc.t_lock);
+void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_fs_chains *nic_chains = nic_chains(priv);
- return PTR_ERR_OR_ZERO(flow->rule[0]);
+ mlx5_del_flow_rules(rule);
+
+ if (attr->chain || attr->prio)
+ mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
+ MLX5E_TC_FT_LEVEL);
+
+ if (attr->dest_chain)
+ mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
+ MLX5E_TC_FT_LEVEL);
}
static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow)
{
- struct mlx5_nic_flow_attr *attr = flow->nic_attr;
- struct mlx5_fc *counter = NULL;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5e_tc_table *tc = &priv->fs.tc;
+
+ flow_flag_clear(flow, OFFLOADED);
- counter = attr->counter;
- if (!IS_ERR_OR_NULL(flow->rule[0]))
- mlx5_del_flow_rules(flow->rule[0]);
- mlx5_fc_destroy(priv->mdev, counter);
+ if (flow_flag_test(flow, CT))
+ mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
+ else if (!IS_ERR_OR_NULL(flow->rule[0]))
+ mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
+ /* Remove root table if no rules are left to avoid
+ * extra steering hops.
+ */
mutex_lock(&priv->fs.tc.t_lock);
- if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) {
- mlx5_destroy_flow_table(priv->fs.tc.t);
+ if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
+ !IS_ERR_OR_NULL(tc->t)) {
+ mlx5_chains_put_table(nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL);
priv->fs.tc.t = NULL;
}
mutex_unlock(&priv->fs.tc.t_lock);
+ kvfree(attr->parse_attr);
+
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
+ mlx5_fc_destroy(priv->mdev, attr->counter);
+
if (flow_flag_test(flow, HAIRPIN))
mlx5e_hairpin_flow_del(priv, flow);
+
+ kfree(flow->attr);
}
static void mlx5e_detach_encap(struct mlx5e_priv *priv,
@@ -1035,7 +1159,7 @@ static struct mlx5_flow_handle *
mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
struct mlx5_flow_handle *rule;
@@ -1043,7 +1167,8 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
if (flow_flag_test(flow, CT)) {
mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
- return mlx5_tc_ct_flow_offload(flow->priv, flow, spec, attr,
+ return mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
+ flow, spec, attr,
mod_hdr_acts);
}
@@ -1051,7 +1176,7 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
if (IS_ERR(rule))
return rule;
- if (attr->split_count) {
+ if (attr->esw_attr->split_count) {
flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
if (IS_ERR(flow->rule[1])) {
mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
@@ -1065,16 +1190,16 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
static void
mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
flow_flag_clear(flow, OFFLOADED);
if (flow_flag_test(flow, CT)) {
- mlx5_tc_ct_delete_flow(flow->priv, flow, attr);
+ mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
return;
}
- if (attr->split_count)
+ if (attr->esw_attr->split_count)
mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
@@ -1085,18 +1210,24 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec)
{
- struct mlx5_esw_flow_attr slow_attr;
+ struct mlx5_flow_attr *slow_attr;
struct mlx5_flow_handle *rule;
- memcpy(&slow_attr, flow->esw_attr, sizeof(slow_attr));
- slow_attr.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- slow_attr.split_count = 0;
- slow_attr.flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
+ slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+ if (!slow_attr)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
+ slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ slow_attr->esw_attr->split_count = 0;
+ slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
- rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, &slow_attr);
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
if (!IS_ERR(rule))
flow_flag_set(flow, SLOW);
+ kfree(slow_attr);
+
return rule;
}
@@ -1104,14 +1235,21 @@ static void
mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow)
{
- struct mlx5_esw_flow_attr slow_attr;
+ struct mlx5_flow_attr *slow_attr;
- memcpy(&slow_attr, flow->esw_attr, sizeof(slow_attr));
- slow_attr.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- slow_attr.split_count = 0;
- slow_attr.flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
- mlx5e_tc_unoffload_fdb_rules(esw, flow, &slow_attr);
+ slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+ if (!slow_attr) {
+ mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
+ return;
+ }
+
+ memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
+ slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ slow_attr->esw_attr->split_count = 0;
+ slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
flow_flag_clear(flow, SLOW);
+ kfree(slow_attr);
}
/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
@@ -1169,9 +1307,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct netlink_ext_ack *extack)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
- struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
struct net_device *out_dev, *encap_dev = NULL;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
struct mlx5_fc *counter = NULL;
struct mlx5e_rep_priv *rpriv;
struct mlx5e_priv *out_priv;
@@ -1180,7 +1319,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
int err = 0;
int out_index;
- if (!mlx5_esw_chains_prios_supported(esw) && attr->prio != 1) {
+ if (!mlx5_chains_prios_supported(esw_chains(esw)) && attr->prio != 1) {
NL_SET_ERR_MSG_MOD(extack,
"E-switch priorities unsupported, upgrade FW");
return -EOPNOTSUPP;
@@ -1191,14 +1330,14 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
* FDB_FT_CHAIN which is outside tc range.
* See mlx5e_rep_setup_ft_cb().
*/
- max_chain = mlx5_esw_chains_get_chain_range(esw);
+ max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
NL_SET_ERR_MSG_MOD(extack,
"Requested chain is out of supported range");
return -EOPNOTSUPP;
}
- max_prio = mlx5_esw_chains_get_prio_range(esw);
+ max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
if (attr->prio > max_prio) {
NL_SET_ERR_MSG_MOD(extack,
"Requested priority is out of supported range");
@@ -1211,10 +1350,13 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
return err;
}
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
+
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
int mirred_ifindex;
- if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+ if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
continue;
mirred_ifindex = parse_attr->mirred_ifindex[out_index];
@@ -1227,8 +1369,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
out_priv = netdev_priv(encap_dev);
rpriv = out_priv->ppriv;
- attr->dests[out_index].rep = rpriv->rep;
- attr->dests[out_index].mdev = out_priv->mdev;
+ esw_attr->dests[out_index].rep = rpriv->rep;
+ esw_attr->dests[out_index].mdev = out_priv->mdev;
}
err = mlx5_eswitch_add_vlan_action(esw, attr);
@@ -1244,7 +1386,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- counter = mlx5_fc_create(attr->counter_dev, true);
+ counter = mlx5_fc_create(esw_attr->counter_dev, true);
if (IS_ERR(counter))
return PTR_ERR(counter);
@@ -1270,7 +1412,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
{
- struct mlx5_flow_spec *spec = &flow->esw_attr->parse_attr->spec;
+ struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
void *headers_v = MLX5_ADDR_OF(fte_match_param,
spec->match_value,
misc_parameters_3);
@@ -1285,16 +1427,13 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
int out_index;
mlx5e_put_flow_tunnel_id(flow);
- if (flow_flag_test(flow, NOT_READY)) {
+ if (flow_flag_test(flow, NOT_READY))
remove_unready_flow(flow);
- kvfree(attr->parse_attr);
- return;
- }
if (mlx5e_is_offloaded_flow(flow)) {
if (flow_flag_test(flow, SLOW))
@@ -1309,20 +1448,24 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5_eswitch_del_vlan_action(esw, attr);
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
- if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
+ if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
mlx5e_detach_encap(priv, flow, out_index);
kfree(attr->parse_attr->tun_info[out_index]);
}
kvfree(attr->parse_attr);
+ mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
+
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
- mlx5_fc_destroy(attr->counter_dev, attr->counter);
+ mlx5_fc_destroy(attr->esw_attr->counter_dev, attr->counter);
if (flow_flag_test(flow, L3_TO_L2_DECAP))
mlx5e_detach_decap(priv, flow);
+
+ kfree(flow->attr);
}
void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
@@ -1332,6 +1475,7 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr *esw_attr;
struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
struct mlx5_flow_spec *spec;
struct mlx5e_tc_flow *flow;
int err;
@@ -1354,8 +1498,9 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
if (!mlx5e_is_offloaded_flow(flow))
continue;
- esw_attr = flow->esw_attr;
- spec = &esw_attr->parse_attr->spec;
+ attr = flow->attr;
+ esw_attr = attr->esw_attr;
+ spec = &attr->parse_attr->spec;
esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
@@ -1375,7 +1520,7 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
if (!all_flow_encaps_valid)
continue;
/* update from slow path rule to encap rule */
- rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
@@ -1395,7 +1540,9 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
struct list_head *flow_list)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *esw_attr;
struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
struct mlx5_flow_spec *spec;
struct mlx5e_tc_flow *flow;
int err;
@@ -1403,12 +1550,14 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
list_for_each_entry(flow, flow_list, tmp_list) {
if (!mlx5e_is_offloaded_flow(flow))
continue;
- spec = &flow->esw_attr->parse_attr->spec;
+ attr = flow->attr;
+ esw_attr = attr->esw_attr;
+ spec = &attr->parse_attr->spec;
/* update from encap rule to slow path rule */
rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
/* mark the flow's encap dest as non-valid */
- flow->esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
+ esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -1417,7 +1566,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
continue;
}
- mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr);
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
flow->rule[0] = rule;
/* was unset when fast path rule removed */
flow_flag_set(flow, OFFLOADED);
@@ -1430,10 +1579,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
{
- if (mlx5e_is_eswitch_flow(flow))
- return flow->esw_attr->counter;
- else
- return flow->nic_attr->counter;
+ return flow->attr->counter;
}
/* Takes reference to all flows attached to encap and adds the flows to
@@ -1799,11 +1945,11 @@ static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
{
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct netlink_ext_ack *extack = f->common.extack;
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
struct flow_match_enc_opts enc_opts_match;
struct tunnel_match_enc_opts tun_enc_opts;
struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5_flow_attr *attr = flow->attr;
struct mlx5e_rep_priv *uplink_rpriv;
struct tunnel_match_key tunnel_key;
bool enc_opts_is_dont_care = true;
@@ -1867,7 +2013,7 @@ static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
} else {
mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
err = mlx5e_tc_match_to_reg_set(priv->mdev,
- mod_hdr_acts,
+ mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
TUNNEL_TO_REG, value);
if (err)
goto err_set;
@@ -1953,8 +2099,8 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
if (!mlx5e_is_eswitch_flow(flow))
return -EOPNOTSUPP;
- needs_mapping = !!flow->esw_attr->chain;
- sets_mapping = !flow->esw_attr->chain && flow_has_tc_fwd_action(f);
+ needs_mapping = !!flow->attr->chain;
+ sets_mapping = !flow->attr->chain && flow_has_tc_fwd_action(f);
*match_inner = !needs_mapping;
if ((needs_mapping || sets_mapping) &&
@@ -1966,7 +2112,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
}
- if (!flow->esw_attr->chain) {
+ if (!flow->attr->chain) {
err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
match_level);
if (err) {
@@ -1981,7 +2127,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
* object
*/
if (!netif_is_bareudp(filter_dev))
- flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
}
if (!needs_mapping && !sets_mapping)
@@ -2484,12 +2630,9 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
}
}
- if (is_eswitch_flow) {
- flow->esw_attr->inner_match_level = inner_match_level;
- flow->esw_attr->outer_match_level = outer_match_level;
- } else {
- flow->nic_attr->match_level = non_tunnel_match_level;
- }
+ flow->attr->inner_match_level = inner_match_level;
+ flow->attr->outer_match_level = outer_match_level;
+
return err;
}
@@ -2615,6 +2758,7 @@ static struct mlx5_fields fields[] = {
OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
+ OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp),
OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport),
OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport),
@@ -2625,6 +2769,22 @@ static struct mlx5_fields fields[] = {
OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport),
};
+static unsigned long mask_to_le(unsigned long mask, int size)
+{
+ __be32 mask_be32;
+ __be16 mask_be16;
+
+ if (size == 32) {
+ mask_be32 = (__force __be32)(mask);
+ mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
+ } else if (size == 16) {
+ mask_be32 = (__force __be32)(mask);
+ mask_be16 = *(__be16 *)&mask_be32;
+ mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
+ }
+
+ return mask;
+}
static int offload_pedit_fields(struct mlx5e_priv *priv,
int namespace,
struct pedit_headers_action *hdrs,
@@ -2638,9 +2798,7 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
struct mlx5e_tc_mod_hdr_acts *mod_acts;
struct mlx5_fields *f;
- unsigned long mask;
- __be32 mask_be32;
- __be16 mask_be16;
+ unsigned long mask, field_mask;
int err;
u8 cmd;
@@ -2706,14 +2864,7 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
if (skip)
continue;
- if (f->field_bsize == 32) {
- mask_be32 = (__force __be32)(mask);
- mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
- } else if (f->field_bsize == 16) {
- mask_be32 = (__force __be32)(mask);
- mask_be16 = *(__be16 *)&mask_be32;
- mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
- }
+ mask = mask_to_le(mask, f->field_bsize);
first = find_first_bit(&mask, f->field_bsize);
next_z = find_next_zero_bit(&mask, f->field_bsize, first);
@@ -2744,9 +2895,10 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
if (cmd == MLX5_ACTION_TYPE_SET) {
int start;
+ field_mask = mask_to_le(f->field_mask, f->field_bsize);
+
/* if field is bit sized it can start not from first bit */
- start = find_first_bit((unsigned long *)&f->field_mask,
- f->field_bsize);
+ start = find_first_bit(&field_mask, f->field_bsize);
MLX5_SET(set_action_in, action, offset, first - start);
/* length is num of bits to be written, zero means length of 32 */
@@ -3083,7 +3235,7 @@ static bool modify_header_match_supported(struct mlx5e_priv *priv,
* we can't restore ct state
*/
if (!ct_clear && modify_tuple &&
- mlx5_tc_ct_add_no_trk_match(priv, spec)) {
+ mlx5_tc_ct_add_no_trk_match(spec)) {
NL_SET_ERR_MSG_MOD(extack,
"can't offload tuple modify header with ct matches");
netdev_info(priv->netdev,
@@ -3114,12 +3266,13 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
bool ct_flow = false, ct_clear = false;
u32 actions;
+ ct_clear = flow->attr->ct_attr.ct_action &
+ TCA_CT_ACT_CLEAR;
+ ct_flow = flow_flag_test(flow, CT) && !ct_clear;
+ actions = flow->attr->action;
+
if (mlx5e_is_eswitch_flow(flow)) {
- actions = flow->esw_attr->action;
- ct_clear = flow->esw_attr->ct_attr.ct_action &
- TCA_CT_ACT_CLEAR;
- ct_flow = flow_flag_test(flow, CT) && !ct_clear;
- if (flow->esw_attr->split_count && ct_flow) {
+ if (flow->attr->esw_attr->split_count && ct_flow) {
/* All registers used by ct are cleared when using
* split rules.
*/
@@ -3127,8 +3280,6 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
"Can't offload mirroring with action ct");
return false;
}
- } else {
- actions = flow->nic_attr->action;
}
if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
@@ -3226,15 +3377,67 @@ add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
extack);
}
+static int validate_goto_chain(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ const struct flow_action_entry *act,
+ u32 actions,
+ struct netlink_ext_ack *extack)
+{
+ bool is_esw = mlx5e_is_eswitch_flow(flow);
+ struct mlx5_flow_attr *attr = flow->attr;
+ bool ft_flow = mlx5e_is_ft_flow(flow);
+ u32 dest_chain = act->chain_index;
+ struct mlx5_fs_chains *chains;
+ struct mlx5_eswitch *esw;
+ u32 reformat_and_fwd;
+ u32 max_chain;
+
+ esw = priv->mdev->priv.eswitch;
+ chains = is_esw ? esw_chains(esw) : nic_chains(priv);
+ max_chain = mlx5_chains_get_chain_range(chains);
+ reformat_and_fwd = is_esw ?
+ MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) :
+ MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table);
+
+ if (ft_flow) {
+ NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!mlx5_chains_backwards_supported(chains) &&
+ dest_chain <= attr->chain) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Goto lower numbered chain isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (dest_chain > max_chain) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Requested destination chain is out of supported range");
+ return -EOPNOTSUPP;
+ }
+
+ if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+ MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
+ !reformat_and_fwd) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Goto chain is not allowed if action has reformat or decap");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
static int parse_tc_nic_actions(struct mlx5e_priv *priv,
struct flow_action *flow_action,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
- struct mlx5_nic_flow_attr *attr = flow->nic_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
struct pedit_headers_action hdrs[2] = {};
const struct flow_action_entry *act;
+ struct mlx5_nic_flow_attr *nic_attr;
u32 action = 0;
int err, i;
@@ -3245,7 +3448,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
FLOW_ACTION_HW_STATS_DELAYED_BIT))
return -EOPNOTSUPP;
- attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ nic_attr = attr->nic_attr;
+
+ nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
@@ -3266,8 +3471,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
if (err)
return err;
- action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
- MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
break;
case FLOW_ACTION_VLAN_MANGLE:
err = add_vlan_rewrite_action(priv,
@@ -3312,10 +3516,26 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
return -EINVAL;
}
- attr->flow_tag = mark;
+ nic_attr->flow_tag = mark;
action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
}
break;
+ case FLOW_ACTION_GOTO:
+ err = validate_goto_chain(priv, flow, act, action,
+ extack);
+ if (err)
+ return err;
+
+ action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ attr->dest_chain = act->chain_index;
+ break;
+ case FLOW_ACTION_CT:
+ err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
+ if (err)
+ return err;
+
+ flow_flag_set(flow, CT);
+ break;
default:
NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
return -EOPNOTSUPP;
@@ -3338,6 +3558,18 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
}
attr->action = action;
+
+ if (attr->dest_chain) {
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
+ return -EOPNOTSUPP;
+ }
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
return -EOPNOTSUPP;
@@ -3469,8 +3701,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
bool *encap_valid)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
const struct ip_tunnel_info *tun_info;
struct encap_key key;
struct mlx5e_encap_entry *e;
@@ -3556,8 +3788,8 @@ attach_flow:
flow->encaps[out_index].index = out_index;
*encap_dev = e->out_dev;
if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
- attr->dests[out_index].pkt_reformat = e->pkt_reformat;
- attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
+ attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
*encap_valid = true;
} else {
*encap_valid = false;
@@ -3584,14 +3816,14 @@ static int mlx5e_attach_decap(struct mlx5e_priv *priv,
struct netlink_ext_ack *extack)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_decap_entry *d;
struct mlx5e_decap_key key;
uintptr_t hash_key;
int err = 0;
- parse_attr = attr->parse_attr;
+ parse_attr = flow->attr->parse_attr;
if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
NL_SET_ERR_MSG_MOD(extack,
"encap header larger than max supported");
@@ -3733,7 +3965,7 @@ static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev,
}
static int add_vlan_push_action(struct mlx5e_priv *priv,
- struct mlx5_esw_flow_attr *attr,
+ struct mlx5_flow_attr *attr,
struct net_device **out_dev,
u32 *action)
{
@@ -3746,7 +3978,7 @@ static int add_vlan_push_action(struct mlx5e_priv *priv,
};
int err;
- err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
+ err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
if (err)
return err;
@@ -3759,7 +3991,7 @@ static int add_vlan_push_action(struct mlx5e_priv *priv,
}
static int add_vlan_pop_action(struct mlx5e_priv *priv,
- struct mlx5_esw_flow_attr *attr,
+ struct mlx5_flow_attr *attr,
u32 *action)
{
struct flow_action_entry vlan_act = {
@@ -3770,7 +4002,7 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv,
nest_level = attr->parse_attr->filter_dev->lower_level -
priv->netdev->lower_level;
while (nest_level--) {
- err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
+ err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
if (err)
return err;
}
@@ -3831,59 +4063,20 @@ static bool is_duplicated_output_device(struct net_device *dev,
return false;
}
-static int mlx5_validate_goto_chain(struct mlx5_eswitch *esw,
- struct mlx5e_tc_flow *flow,
- const struct flow_action_entry *act,
- u32 actions,
- struct netlink_ext_ack *extack)
-{
- u32 max_chain = mlx5_esw_chains_get_chain_range(esw);
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
- bool ft_flow = mlx5e_is_ft_flow(flow);
- u32 dest_chain = act->chain_index;
-
- if (ft_flow) {
- NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
- return -EOPNOTSUPP;
- }
-
- if (!mlx5_esw_chains_backwards_supported(esw) &&
- dest_chain <= attr->chain) {
- NL_SET_ERR_MSG_MOD(extack,
- "Goto lower numbered chain isn't supported");
- return -EOPNOTSUPP;
- }
- if (dest_chain > max_chain) {
- NL_SET_ERR_MSG_MOD(extack,
- "Requested destination chain is out of supported range");
- return -EOPNOTSUPP;
- }
-
- if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
- MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
- !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_and_fwd_to_table)) {
- NL_SET_ERR_MSG_MOD(extack,
- "Goto chain is not allowed if action has reformat or decap");
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
static int verify_uplink_forwarding(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct net_device *out_dev,
struct netlink_ext_ack *extack)
{
+ struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
struct mlx5e_rep_priv *rep_priv;
/* Forwarding non encapsulated traffic between
* uplink ports is allowed only if
* termination_table_raw_traffic cap is set.
*
- * Input vport was stored esw_attr->in_rep.
+ * Input vport was stored attr->in_rep.
* In LAG case, *priv* is the private data of
* uplink which may be not the input vport.
*/
@@ -3918,13 +4111,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
{
struct pedit_headers_action hdrs[2] = {};
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
- struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
const struct ip_tunnel_info *info = NULL;
+ struct mlx5_flow_attr *attr = flow->attr;
int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
bool ft_flow = mlx5e_is_ft_flow(flow);
const struct flow_action_entry *act;
+ struct mlx5_esw_flow_attr *esw_attr;
bool encap = false, decap = false;
u32 action = attr->action;
int err, i, if_count = 0;
@@ -3937,12 +4131,25 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
FLOW_ACTION_HW_STATS_DELAYED_BIT))
return -EOPNOTSUPP;
+ esw_attr = attr->esw_attr;
+ parse_attr = attr->parse_attr;
+
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
case FLOW_ACTION_DROP:
action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
break;
+ case FLOW_ACTION_TRAP:
+ if (!flow_offload_has_one_action(flow_action)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "action trap is supported as a sole action only");
+ return -EOPNOTSUPP;
+ }
+ action |= (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT);
+ attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
+ break;
case FLOW_ACTION_MPLS_PUSH:
if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
reformat_l2_to_l3_tunnel) ||
@@ -3983,7 +4190,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
if (!flow_flag_test(flow, L3_TO_L2_DECAP)) {
action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- attr->split_count = attr->out_count;
+ esw_attr->split_count = esw_attr->out_count;
}
break;
case FLOW_ACTION_CSUM:
@@ -4020,27 +4227,27 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
}
- if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
+ if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
NL_SET_ERR_MSG_MOD(extack,
"can't support more output ports, can't offload forwarding");
netdev_warn(priv->netdev,
"can't support more than %d output ports, can't offload forwarding\n",
- attr->out_count);
+ esw_attr->out_count);
return -EOPNOTSUPP;
}
action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
if (encap) {
- parse_attr->mirred_ifindex[attr->out_count] =
+ parse_attr->mirred_ifindex[esw_attr->out_count] =
out_dev->ifindex;
- parse_attr->tun_info[attr->out_count] = dup_tun_info(info);
- if (!parse_attr->tun_info[attr->out_count])
+ parse_attr->tun_info[esw_attr->out_count] = dup_tun_info(info);
+ if (!parse_attr->tun_info[esw_attr->out_count])
return -ENOMEM;
encap = false;
- attr->dests[attr->out_count].flags |=
+ esw_attr->dests[esw_attr->out_count].flags |=
MLX5_ESW_DEST_ENCAP;
- attr->out_count++;
+ esw_attr->out_count++;
/* attr->dests[].rep is resolved when we
* handle encap
*/
@@ -4089,9 +4296,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
out_priv = netdev_priv(out_dev);
rpriv = out_priv->ppriv;
- attr->dests[attr->out_count].rep = rpriv->rep;
- attr->dests[attr->out_count].mdev = out_priv->mdev;
- attr->out_count++;
+ esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
+ esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;
+ esw_attr->out_count++;
} else if (parse_attr->filter_dev != priv->netdev) {
/* All mlx5 devices are called to configure
* high level device filters. Therefore, the
@@ -4129,12 +4336,12 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
act, parse_attr, hdrs,
&action, extack);
} else {
- err = parse_tc_vlan_action(priv, act, attr, &action);
+ err = parse_tc_vlan_action(priv, act, esw_attr, &action);
}
if (err)
return err;
- attr->split_count = attr->out_count;
+ esw_attr->split_count = esw_attr->out_count;
break;
case FLOW_ACTION_VLAN_MANGLE:
err = add_vlan_rewrite_action(priv,
@@ -4144,14 +4351,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
if (err)
return err;
- attr->split_count = attr->out_count;
+ esw_attr->split_count = esw_attr->out_count;
break;
case FLOW_ACTION_TUNNEL_DECAP:
decap = true;
break;
case FLOW_ACTION_GOTO:
- err = mlx5_validate_goto_chain(esw, flow, act, action,
- extack);
+ err = validate_goto_chain(priv, flow, act, action,
+ extack);
if (err)
return err;
@@ -4159,7 +4366,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
attr->dest_chain = act->chain_index;
break;
case FLOW_ACTION_CT:
- err = mlx5_tc_ct_parse_action(priv, attr, act, extack);
+ err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
if (err)
return err;
@@ -4198,7 +4405,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
(action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
- attr->split_count = 0;
+ esw_attr->split_count = 0;
}
}
@@ -4238,7 +4445,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
}
- if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
+ if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
NL_SET_ERR_MSG_MOD(extack,
"current firmware doesn't support split rule for port mirroring");
netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
@@ -4289,25 +4496,37 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
{
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
- bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK &&
+ struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
flow_flag_test(flow, INGRESS);
bool act_is_encap = !!(attr->action &
MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
- bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom,
+ bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
MLX5_DEVCOM_ESW_OFFLOADS);
if (!esw_paired)
return false;
- if ((mlx5_lag_is_sriov(attr->in_mdev) ||
- mlx5_lag_is_multipath(attr->in_mdev)) &&
+ if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
+ mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
(is_rep_ingress || act_is_encap))
return true;
return false;
}
+struct mlx5_flow_attr *
+mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
+{
+ u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB) ?
+ sizeof(struct mlx5_esw_flow_attr) :
+ sizeof(struct mlx5_nic_flow_attr);
+ struct mlx5_flow_attr *attr;
+
+ return kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
+}
+
static int
mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
struct flow_cls_offload *f, unsigned long flow_flags,
@@ -4315,19 +4534,26 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
struct mlx5e_tc_flow **__flow)
{
struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr;
struct mlx5e_tc_flow *flow;
- int out_index, err;
+ int err = -ENOMEM;
+ int out_index;
- flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
+ flow = kzalloc(sizeof(*flow), GFP_KERNEL);
parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
- if (!parse_attr || !flow) {
- err = -ENOMEM;
+ if (!parse_attr || !flow)
goto err_free;
- }
- flow->cookie = f->cookie;
flow->flags = flow_flags;
+ flow->cookie = f->cookie;
flow->priv = priv;
+
+ attr = mlx5_alloc_flow_attr(get_flow_name_space(flow));
+ if (!attr)
+ goto err_free;
+
+ flow->attr = attr;
+
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
INIT_LIST_HEAD(&flow->encaps[out_index].list);
INIT_LIST_HEAD(&flow->hairpin);
@@ -4347,7 +4573,17 @@ err_free:
}
static void
-mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
+mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct flow_cls_offload *f)
+{
+ attr->parse_attr = parse_attr;
+ attr->chain = f->common.chain_index;
+ attr->prio = f->common.prio;
+}
+
+static void
+mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct flow_cls_offload *f,
@@ -4355,10 +4591,9 @@ mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
struct mlx5_core_dev *in_mdev)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
- esw_attr->parse_attr = parse_attr;
- esw_attr->chain = f->common.chain_index;
- esw_attr->prio = f->common.prio;
+ mlx5e_flow_attr_init(attr, parse_attr, f);
esw_attr->in_rep = in_rep;
esw_attr->in_mdev = in_mdev;
@@ -4392,7 +4627,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
goto out;
parse_attr->filter_dev = filter_dev;
- mlx5e_flow_esw_attr_init(flow->esw_attr,
+ mlx5e_flow_esw_attr_init(flow->attr,
priv, parse_attr,
f, in_rep, in_mdev);
@@ -4402,8 +4637,8 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
goto err_free;
/* actions validation depends on parsing the ct matches first */
- err = mlx5_tc_ct_parse_match(priv, &parse_attr->spec, f,
- &flow->esw_attr->ct_attr, extack);
+ err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
+ &flow->attr->ct_attr, extack);
if (err)
goto err_free;
@@ -4434,6 +4669,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
{
struct mlx5e_priv *priv = flow->priv, *peer_priv;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
+ struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_rep_priv *peer_urpriv;
@@ -4453,15 +4689,15 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
* original flow and packets redirected from uplink use the
* peer mdev.
*/
- if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK)
+ if (attr->in_rep->vport == MLX5_VPORT_UPLINK)
in_mdev = peer_priv->mdev;
else
in_mdev = priv->mdev;
- parse_attr = flow->esw_attr->parse_attr;
+ parse_attr = flow->attr->parse_attr;
peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
parse_attr->filter_dev,
- flow->esw_attr->in_rep, in_mdev);
+ attr->in_rep, in_mdev);
if (IS_ERR(peer_flow)) {
err = PTR_ERR(peer_flow);
goto out;
@@ -4525,9 +4761,12 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow;
int attr_size, err;
- /* multi-chain not supported for NIC rules */
- if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
+ if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
+ return -EOPNOTSUPP;
+ } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
return -EOPNOTSUPP;
+ }
flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
attr_size = sizeof(struct mlx5_nic_flow_attr);
@@ -4537,11 +4776,18 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
goto out;
parse_attr->filter_dev = filter_dev;
+ mlx5e_flow_attr_init(flow->attr, parse_attr, f);
+
err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
f, filter_dev);
if (err)
goto err_free;
+ err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
+ &flow->attr->ct_attr, extack);
+ if (err)
+ goto err_free;
+
err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
if (err)
goto err_free;
@@ -4551,14 +4797,12 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
goto err_free;
flow_flag_set(flow, OFFLOADED);
- kvfree(parse_attr);
*__flow = flow;
return 0;
err_free:
mlx5e_flow_put(priv, flow);
- kvfree(parse_attr);
out:
return err;
}
@@ -4933,9 +5177,27 @@ static int mlx5e_tc_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
}
+static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
+{
+ int tc_grp_size, tc_tbl_size;
+ u32 max_flow_counter;
+
+ max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+ MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+
+ tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
+
+ tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
+ BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
+
+ return tc_tbl_size;
+}
+
int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
{
struct mlx5e_tc_table *tc = &priv->fs.tc;
+ struct mlx5_core_dev *dev = priv->mdev;
+ struct mlx5_chains_attr attr = {};
int err;
mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
@@ -4947,6 +5209,27 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
if (err)
return err;
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
+ attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
+ MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
+ attr.max_restore_tag = MLX5E_TC_TABLE_CHAIN_TAG_MASK;
+ }
+ attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
+ attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
+ attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
+ attr.default_ft = priv->fs.vlan.ft.t;
+
+ tc->chains = mlx5_chains_create(dev, &attr);
+ if (IS_ERR(tc->chains)) {
+ err = PTR_ERR(tc->chains);
+ goto err_chains;
+ }
+
+ tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
+ MLX5_FLOW_NAMESPACE_KERNEL);
+ if (IS_ERR(tc->ct))
+ goto err_ct;
+
tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
err = register_netdevice_notifier_dev_net(priv->netdev,
&tc->netdevice_nb,
@@ -4954,8 +5237,17 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
if (err) {
tc->netdevice_nb.notifier_call = NULL;
mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
+ goto err_reg;
}
+ return 0;
+
+err_reg:
+ mlx5_tc_ct_clean(tc->ct);
+err_ct:
+ mlx5_chains_destroy(tc->chains);
+err_chains:
+ rhashtable_destroy(&tc->ht);
return err;
}
@@ -4980,28 +5272,38 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
mutex_destroy(&tc->hairpin_tbl_lock);
- rhashtable_destroy(&tc->ht);
+ rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
if (!IS_ERR_OR_NULL(tc->t)) {
- mlx5_destroy_flow_table(tc->t);
+ mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
tc->t = NULL;
}
mutex_destroy(&tc->t_lock);
+
+ mlx5_tc_ct_clean(tc->ct);
+ mlx5_chains_destroy(tc->chains);
}
int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
{
const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
struct mlx5_rep_uplink_priv *uplink_priv;
- struct mlx5e_rep_priv *priv;
+ struct mlx5e_rep_priv *rpriv;
struct mapping_ctx *mapping;
- int err;
+ struct mlx5_eswitch *esw;
+ struct mlx5e_priv *priv;
+ int err = 0;
uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
- priv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
+ rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
+ priv = netdev_priv(rpriv->netdev);
+ esw = priv->mdev->priv.eswitch;
- err = mlx5_tc_ct_init(uplink_priv);
- if (err)
+ uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
+ esw_chains(esw),
+ &esw->offloads.mod_hdr,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(uplink_priv->ct_priv))
goto err_ct;
mapping = mapping_create(sizeof(struct tunnel_match_key),
@@ -5030,7 +5332,7 @@ err_ht_init:
err_enc_opts_mapping:
mapping_destroy(uplink_priv->tunnel_mapping);
err_tun_mapping:
- mlx5_tc_ct_clean(uplink_priv);
+ mlx5_tc_ct_clean(uplink_priv->ct_priv);
err_ct:
netdev_warn(priv->netdev,
"Failed to initialize tc (eswitch), err: %d", err);
@@ -5044,10 +5346,11 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
+
mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
mapping_destroy(uplink_priv->tunnel_mapping);
- mlx5_tc_ct_clean(uplink_priv);
+ mlx5_tc_ct_clean(uplink_priv->ct_priv);
}
int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
@@ -5112,3 +5415,44 @@ int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
return -EOPNOTSUPP;
}
}
+
+bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
+ struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ u32 chain = 0, chain_tag, reg_b, zone_restore_id;
+ struct mlx5e_priv *priv = netdev_priv(skb->dev);
+ struct mlx5e_tc_table *tc = &priv->fs.tc;
+ struct tc_skb_ext *tc_skb_ext;
+ int err;
+
+ reg_b = be32_to_cpu(cqe->ft_metadata);
+
+ chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
+
+ err = mlx5_get_chain_for_tag(nic_chains(priv), chain_tag, &chain);
+ if (err) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find chain for chain tag: %d, err: %d\n",
+ chain_tag, err);
+ return false;
+ }
+
+ if (chain) {
+ tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
+ if (WARN_ON(!tc_skb_ext))
+ return false;
+
+ tc_skb_ext->chain = chain;
+
+ zone_restore_id = (reg_b >> REG_MAPPING_SHIFT(NIC_ZONE_RESTORE_TO_REG)) &
+ ZONE_RESTORE_MAX;
+
+ if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
+ zone_restore_id))
+ return false;
+ }
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
+ return true;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 437f680728fd..3b979008143d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -35,17 +35,57 @@
#include <net/pkt_cls.h>
#include "en.h"
+#include "eswitch.h"
+#include "en/tc_ct.h"
#define MLX5E_TC_FLOW_ID_MASK 0x0000ffff
#ifdef CONFIG_MLX5_ESWITCH
+#define NIC_FLOW_ATTR_SZ (sizeof(struct mlx5_flow_attr) +\
+ sizeof(struct mlx5_nic_flow_attr))
+#define ESW_FLOW_ATTR_SZ (sizeof(struct mlx5_flow_attr) +\
+ sizeof(struct mlx5_esw_flow_attr))
+#define ns_to_attr_sz(ns) (((ns) == MLX5_FLOW_NAMESPACE_FDB) ?\
+ ESW_FLOW_ATTR_SZ :\
+ NIC_FLOW_ATTR_SZ)
+
+
int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags);
struct mlx5e_tc_update_priv {
struct net_device *tun_dev;
};
+struct mlx5_nic_flow_attr {
+ u32 flow_tag;
+ u32 hairpin_tirn;
+ struct mlx5_flow_table *hairpin_ft;
+};
+
+struct mlx5_flow_attr {
+ u32 action;
+ struct mlx5_fc *counter;
+ struct mlx5_modify_hdr *modify_hdr;
+ struct mlx5_ct_attr ct_attr;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ u32 chain;
+ u16 prio;
+ u32 dest_chain;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_table *dest_ft;
+ u8 inner_match_level;
+ u8 outer_match_level;
+ u32 flags;
+ union {
+ struct mlx5_esw_flow_attr esw_attr[0];
+ struct mlx5_nic_flow_attr nic_attr[0];
+ };
+};
+
+#define MLX5E_TC_TABLE_CHAIN_TAG_BITS 16
+#define MLX5E_TC_TABLE_CHAIN_TAG_MASK GENMASK(MLX5E_TC_TABLE_CHAIN_TAG_BITS - 1, 0)
+
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
struct tunnel_match_key {
@@ -90,6 +130,7 @@ enum {
int mlx5e_tc_esw_init(struct rhashtable *tc_ht);
void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht);
+bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow);
int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct flow_cls_offload *f, unsigned long flags);
@@ -133,6 +174,8 @@ enum mlx5e_tc_attr_to_reg {
MARK_TO_REG,
LABELS_TO_REG,
FTEID_TO_REG,
+ NIC_CHAIN_TO_REG,
+ NIC_ZONE_RESTORE_TO_REG,
};
struct mlx5e_tc_attr_to_reg_mapping {
@@ -150,6 +193,7 @@ bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5_flow_namespace_type ns,
enum mlx5e_tc_attr_to_reg type,
u32 data);
@@ -181,14 +225,42 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv);
int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
void *cb_priv);
+struct mlx5_flow_handle *
+mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
+struct mlx5_flow_handle *
+mlx5_tc_rule_insert(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+void
+mlx5_tc_rule_delete(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
#else /* CONFIG_MLX5_CLS_ACT */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
static inline int
mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
{ return -EOPNOTSUPP; }
+
#endif /* CONFIG_MLX5_CLS_ACT */
+struct mlx5_flow_attr *mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type);
+
+struct mlx5_flow_handle *
+mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
#else /* CONFIG_MLX5_ESWITCH */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
@@ -203,4 +275,29 @@ mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
{ return -EOPNOTSUPP; }
#endif
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe)
+{
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ u32 chain, reg_b;
+
+ reg_b = be32_to_cpu(cqe->ft_metadata);
+
+ chain = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
+ if (chain)
+ return true;
+#endif
+
+ return false;
+}
+
+bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb);
+#else /* CONFIG_MLX5_CLS_ACT */
+static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe)
+{ return false; }
+static inline bool
+mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb)
+{ return true; }
+#endif
+
#endif /* __MLX5_EN_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index da596de3abba..82b4419af9d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -144,9 +144,29 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs)
memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz);
}
+/* RM 2311217: no L4 inner checksum for IPsec tunnel type packet */
+static void
+ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
+ if (skb->encapsulation) {
+ eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
+ sq->stats->csum_partial_inner++;
+ } else {
+ eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
+ sq->stats->csum_partial++;
+ }
+}
+
static inline void
mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
{
+ if (unlikely(eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC))) {
+ ipsec_txwqe_build_eseg_csum(sq, skb, eseg);
+ return;
+ }
+
if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
if (skb->encapsulation) {
@@ -232,131 +252,188 @@ dma_unmap_wqe_err:
return -ENOMEM;
}
+struct mlx5e_tx_attr {
+ u32 num_bytes;
+ u16 headlen;
+ u16 ihs;
+ __be16 mss;
+ u16 insz;
+ u8 opcode;
+};
+
+struct mlx5e_tx_wqe_attr {
+ u16 ds_cnt;
+ u16 ds_cnt_inl;
+ u16 ds_cnt_ids;
+ u8 num_wqebbs;
+};
+
+static u8
+mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_accel_tx_state *accel)
+{
+ u8 mode;
+
+#ifdef CONFIG_MLX5_EN_TLS
+ if (accel && accel->tls.tls_tisn)
+ return MLX5_INLINE_MODE_TCP_UDP;
+#endif
+
+ mode = sq->min_inline_mode;
+
+ if (skb_vlan_tag_present(skb) &&
+ test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state))
+ mode = max_t(u8, MLX5_INLINE_MODE_L2, mode);
+
+ return mode;
+}
+
+static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_accel_tx_state *accel,
+ struct mlx5e_tx_attr *attr)
+{
+ struct mlx5e_sq_stats *stats = sq->stats;
+
+ if (skb_is_gso(skb)) {
+ u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb);
+
+ *attr = (struct mlx5e_tx_attr) {
+ .opcode = MLX5_OPCODE_LSO,
+ .mss = cpu_to_be16(skb_shinfo(skb)->gso_size),
+ .ihs = ihs,
+ .num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs,
+ .headlen = skb_headlen(skb) - ihs,
+ };
+
+ stats->packets += skb_shinfo(skb)->gso_segs;
+ } else {
+ u8 mode = mlx5e_tx_wqe_inline_mode(sq, skb, accel);
+ u16 ihs = mlx5e_calc_min_inline(mode, skb);
+
+ *attr = (struct mlx5e_tx_attr) {
+ .opcode = MLX5_OPCODE_SEND,
+ .mss = cpu_to_be16(0),
+ .ihs = ihs,
+ .num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN),
+ .headlen = skb_headlen(skb) - ihs,
+ };
+
+ stats->packets++;
+ }
+
+ attr->insz = mlx5e_accel_tx_ids_len(sq, accel);
+ stats->bytes += attr->num_bytes;
+}
+
+static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_attr *attr,
+ struct mlx5e_tx_wqe_attr *wqe_attr)
+{
+ u16 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT;
+ u16 ds_cnt_inl = 0;
+ u16 ds_cnt_ids = 0;
+
+ if (attr->insz)
+ ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz,
+ MLX5_SEND_WQE_DS);
+
+ ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags + ds_cnt_ids;
+ if (attr->ihs) {
+ u16 inl = attr->ihs - INL_HDR_START_SZ;
+
+ if (skb_vlan_tag_present(skb))
+ inl += VLAN_HLEN;
+
+ ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS);
+ ds_cnt += ds_cnt_inl;
+ }
+
+ *wqe_attr = (struct mlx5e_tx_wqe_attr) {
+ .ds_cnt = ds_cnt,
+ .ds_cnt_inl = ds_cnt_inl,
+ .ds_cnt_ids = ds_cnt_ids,
+ .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
+ };
+}
+
+static void mlx5e_tx_skb_update_hwts_flags(struct sk_buff *skb)
+{
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+}
+
+static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq)
+{
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room))) {
+ netif_tx_stop_queue(sq->txq);
+ sq->stats->stopped++;
+ }
+}
+
static inline void
mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
- u8 opcode, u16 ds_cnt, u8 num_wqebbs, u32 num_bytes, u8 num_dma,
+ const struct mlx5e_tx_attr *attr,
+ const struct mlx5e_tx_wqe_attr *wqe_attr, u8 num_dma,
struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg,
bool xmit_more)
{
struct mlx5_wq_cyc *wq = &sq->wq;
bool send_doorbell;
- wi->num_bytes = num_bytes;
- wi->num_dma = num_dma;
- wi->num_wqebbs = num_wqebbs;
- wi->skb = skb;
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .skb = skb,
+ .num_bytes = attr->num_bytes,
+ .num_dma = num_dma,
+ .num_wqebbs = wqe_attr->num_wqebbs,
+ .num_fifo_pkts = 0,
+ };
- cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
- cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt);
- if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
- skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+ mlx5e_tx_skb_update_hwts_flags(skb);
sq->pc += wi->num_wqebbs;
- if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, sq->stop_room))) {
- netif_tx_stop_queue(sq->txq);
- sq->stats->stopped++;
- }
- send_doorbell = __netdev_tx_sent_queue(sq->txq, num_bytes,
- xmit_more);
+ mlx5e_tx_check_stop(sq);
+
+ send_doorbell = __netdev_tx_sent_queue(sq->txq, attr->num_bytes, xmit_more);
if (send_doorbell)
mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
}
-void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
- struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
+static void
+mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ const struct mlx5e_tx_attr *attr, const struct mlx5e_tx_wqe_attr *wqe_attr,
+ struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
{
- struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5_wqe_ctrl_seg *cseg;
struct mlx5_wqe_eth_seg *eseg;
struct mlx5_wqe_data_seg *dseg;
struct mlx5e_tx_wqe_info *wi;
struct mlx5e_sq_stats *stats = sq->stats;
- u16 headlen, ihs, contig_wqebbs_room;
- u16 ds_cnt, ds_cnt_inl = 0;
- u8 num_wqebbs, opcode;
- u32 num_bytes;
int num_dma;
- __be16 mss;
-
- /* Calc ihs and ds cnt, no writes to wqe yet */
- ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
- if (skb_is_gso(skb)) {
- opcode = MLX5_OPCODE_LSO;
- mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
- ihs = mlx5e_tx_get_gso_ihs(sq, skb);
- num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
- stats->packets += skb_shinfo(skb)->gso_segs;
- } else {
- u8 mode = mlx5e_tx_wqe_inline_mode(sq, &wqe->ctrl, skb);
- opcode = MLX5_OPCODE_SEND;
- mss = 0;
- ihs = mlx5e_calc_min_inline(mode, skb);
- num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
- stats->packets++;
- }
-
- stats->bytes += num_bytes;
stats->xmit_more += xmit_more;
- headlen = skb->len - ihs - skb->data_len;
- ds_cnt += !!headlen;
- ds_cnt += skb_shinfo(skb)->nr_frags;
-
- if (ihs) {
- ihs += !!skb_vlan_tag_present(skb) * VLAN_HLEN;
-
- ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS);
- ds_cnt += ds_cnt_inl;
- }
-
- num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
- contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
- if (unlikely(contig_wqebbs_room < num_wqebbs)) {
-#ifdef CONFIG_MLX5_EN_IPSEC
- struct mlx5_wqe_eth_seg cur_eth = wqe->eth;
-#endif
-#ifdef CONFIG_MLX5_EN_TLS
- struct mlx5_wqe_ctrl_seg cur_ctrl = wqe->ctrl;
-#endif
- mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
- pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- wqe = MLX5E_TX_FETCH_WQE(sq, pi);
-#ifdef CONFIG_MLX5_EN_IPSEC
- wqe->eth = cur_eth;
-#endif
-#ifdef CONFIG_MLX5_EN_TLS
- wqe->ctrl = cur_ctrl;
-#endif
- }
-
/* fill wqe */
wi = &sq->db.wqe_info[pi];
cseg = &wqe->ctrl;
eseg = &wqe->eth;
dseg = wqe->data;
-#if IS_ENABLED(CONFIG_GENEVE)
- if (skb->encapsulation)
- mlx5e_tx_tunnel_accel(skb, eseg);
-#endif
- mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
+ eseg->mss = attr->mss;
- eseg->mss = mss;
-
- if (ihs) {
- eseg->inline_hdr.sz = cpu_to_be16(ihs);
+ if (attr->ihs) {
if (skb_vlan_tag_present(skb)) {
- ihs -= VLAN_HLEN;
- mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs);
+ eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs + VLAN_HLEN);
+ mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs);
stats->added_vlan_packets++;
} else {
- memcpy(eseg->inline_hdr.start, skb->data, ihs);
+ eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs);
+ memcpy(eseg->inline_hdr.start, skb->data, attr->ihs);
}
- dseg += ds_cnt_inl;
+ dseg += wqe_attr->ds_cnt_inl;
} else if (skb_vlan_tag_present(skb)) {
eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN);
if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD))
@@ -365,12 +442,13 @@ void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
stats->added_vlan_packets++;
}
- num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg);
+ dseg += wqe_attr->ds_cnt_ids;
+ num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs,
+ attr->headlen, dseg);
if (unlikely(num_dma < 0))
goto err_drop;
- mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
- num_dma, wi, cseg, xmit_more);
+ mlx5e_txwqe_complete(sq, skb, attr, wqe_attr, num_dma, wi, cseg, xmit_more);
return;
@@ -379,10 +457,173 @@ err_drop:
dev_kfree_skb_any(skb);
}
+static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr)
+{
+ return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs &&
+ !attr->insz;
+}
+
+static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+
+ /* Assumes the session is already running and has at least one packet. */
+ return !memcmp(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN);
+}
+
+static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
+
+ pi = mlx5e_txqsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS);
+ wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+ prefetchw(wqe->data);
+
+ *session = (struct mlx5e_tx_mpwqe) {
+ .wqe = wqe,
+ .bytes_count = 0,
+ .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
+ .pkt_count = 0,
+ .inline_on = 0,
+ };
+
+ memcpy(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN);
+
+ sq->stats->mpwqe_blks++;
+}
+
+static bool mlx5e_tx_mpwqe_session_is_active(struct mlx5e_txqsq *sq)
+{
+ return sq->mpwqe.wqe;
+}
+
+static void mlx5e_tx_mpwqe_add_dseg(struct mlx5e_txqsq *sq, struct mlx5e_xmit_data *txd)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wqe_data_seg *dseg;
+
+ dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
+
+ session->pkt_count++;
+ session->bytes_count += txd->len;
+
+ dseg->addr = cpu_to_be64(txd->dma_addr);
+ dseg->byte_count = cpu_to_be32(txd->len);
+ dseg->lkey = sq->mkey_be;
+ session->ds_count++;
+
+ sq->stats->mpwqe_pkts++;
+}
+
+static struct mlx5_wqe_ctrl_seg *mlx5e_tx_mpwqe_session_complete(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ u8 ds_count = session->ds_count;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5e_tx_wqe_info *wi;
+ u16 pi;
+
+ cseg = &session->wqe->ctrl;
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
+
+ pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ wi = &sq->db.wqe_info[pi];
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .skb = NULL,
+ .num_bytes = session->bytes_count,
+ .num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS),
+ .num_dma = session->pkt_count,
+ .num_fifo_pkts = session->pkt_count,
+ };
+
+ sq->pc += wi->num_wqebbs;
+
+ session->wqe = NULL;
+
+ mlx5e_tx_check_stop(sq);
+
+ return cseg;
+}
+
+static void
+mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg, bool xmit_more)
+{
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5e_xmit_data txd;
+
+ if (!mlx5e_tx_mpwqe_session_is_active(sq)) {
+ mlx5e_tx_mpwqe_session_start(sq, eseg);
+ } else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) {
+ mlx5e_tx_mpwqe_session_complete(sq);
+ mlx5e_tx_mpwqe_session_start(sq, eseg);
+ }
+
+ sq->stats->xmit_more += xmit_more;
+
+ txd.data = skb->data;
+ txd.len = skb->len;
+
+ txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr)))
+ goto err_unmap;
+ mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE);
+
+ mlx5e_skb_fifo_push(sq, skb);
+
+ mlx5e_tx_mpwqe_add_dseg(sq, &txd);
+
+ mlx5e_tx_skb_update_hwts_flags(skb);
+
+ if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) {
+ /* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */
+ cseg = mlx5e_tx_mpwqe_session_complete(sq);
+
+ if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more))
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
+ } else if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) {
+ /* Might stop the queue, but we were asked to ring the doorbell anyway. */
+ cseg = mlx5e_tx_mpwqe_session_complete(sq);
+
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
+ }
+
+ return;
+
+err_unmap:
+ mlx5e_dma_unmap_wqe_err(sq, 1);
+ sq->stats->dropped++;
+ dev_kfree_skb_any(skb);
+}
+
+void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
+{
+ /* Unlikely in non-MPWQE workloads; not important in MPWQE workloads. */
+ if (unlikely(mlx5e_tx_mpwqe_session_is_active(sq)))
+ mlx5e_tx_mpwqe_session_complete(sq);
+}
+
+static bool mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
+ struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
+{
+ if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg)))
+ return false;
+
+ mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
+
+ return true;
+}
+
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_accel_tx_state accel = {};
+ struct mlx5e_tx_wqe_attr wqe_attr;
+ struct mlx5e_tx_attr attr;
struct mlx5e_tx_wqe *wqe;
struct mlx5e_txqsq *sq;
u16 pi;
@@ -391,21 +632,92 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
/* May send SKBs and WQEs. */
if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel)))
- goto out;
+ return NETDEV_TX_OK;
- pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ mlx5e_sq_xmit_prepare(sq, skb, &accel, &attr);
+
+ if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) {
+ if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) {
+ struct mlx5_wqe_eth_seg eseg = {};
+
+ if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &eseg)))
+ return NETDEV_TX_OK;
+
+ mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more());
+ return NETDEV_TX_OK;
+ }
+
+ mlx5e_tx_mpwqe_ensure_complete(sq);
+ }
+
+ mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
+ pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
/* May update the WQE, but may not post other WQEs. */
- if (unlikely(!mlx5e_accel_tx_finish(priv, sq, skb, wqe, &accel)))
- goto out;
+ mlx5e_accel_tx_finish(sq, wqe, &accel,
+ (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl));
+ if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &wqe->eth)))
+ return NETDEV_TX_OK;
- mlx5e_sq_xmit(sq, skb, wqe, pi, netdev_xmit_more());
+ mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more());
-out:
return NETDEV_TX_OK;
}
+void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more)
+{
+ struct mlx5e_tx_wqe_attr wqe_attr;
+ struct mlx5e_tx_attr attr;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
+
+ mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr);
+ mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
+ pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
+ wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+ mlx5e_txwqe_build_eseg_csum(sq, skb, &wqe->eth);
+ mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, xmit_more);
+}
+
+static void mlx5e_tx_wi_dma_unmap(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
+ u32 *dma_fifo_cc)
+{
+ int i;
+
+ for (i = 0; i < wi->num_dma; i++) {
+ struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++);
+
+ mlx5e_tx_dma_unmap(sq->pdev, dma);
+ }
+}
+
+static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe, int napi_budget)
+{
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+ struct skb_shared_hwtstamps hwts = {};
+ u64 ts = get_cqe_ts(cqe);
+
+ hwts.hwtstamp = mlx5_timecounter_cyc2time(sq->clock, ts);
+ skb_tstamp_tx(skb, &hwts);
+ }
+
+ napi_consume_skb(skb, napi_budget);
+}
+
+static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
+ struct mlx5_cqe64 *cqe, int napi_budget)
+{
+ int i;
+
+ for (i = 0; i < wi->num_fifo_pkts; i++) {
+ struct sk_buff *skb = mlx5e_skb_fifo_pop(sq);
+
+ mlx5e_consume_skb(sq, skb, cqe, napi_budget);
+ }
+}
+
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
{
struct mlx5e_sq_stats *stats;
@@ -451,42 +763,33 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
wqe_counter = be16_to_cpu(cqe->wqe_counter);
do {
- struct sk_buff *skb;
- int j;
-
last_wqe = (sqcc == wqe_counter);
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci];
- skb = wi->skb;
- if (unlikely(!skb)) {
- mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
- sqcc += wi->num_wqebbs;
- continue;
- }
+ sqcc += wi->num_wqebbs;
- if (unlikely(skb_shinfo(skb)->tx_flags &
- SKBTX_HW_TSTAMP)) {
- struct skb_shared_hwtstamps hwts = {};
+ if (likely(wi->skb)) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);
- hwts.hwtstamp =
- mlx5_timecounter_cyc2time(sq->clock,
- get_cqe_ts(cqe));
- skb_tstamp_tx(skb, &hwts);
+ npkts++;
+ nbytes += wi->num_bytes;
+ continue;
}
- for (j = 0; j < wi->num_dma; j++) {
- struct mlx5e_sq_dma *dma =
- mlx5e_dma_get(sq, dma_fifo_cc++);
+ if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi,
+ &dma_fifo_cc)))
+ continue;
- mlx5e_tx_dma_unmap(sq->pdev, dma);
- }
+ if (wi->num_fifo_pkts) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget);
- npkts++;
- nbytes += wi->num_bytes;
- sqcc += wi->num_wqebbs;
- napi_consume_skb(skb, napi_budget);
+ npkts += wi->num_fifo_pkts;
+ nbytes += wi->num_bytes;
+ }
} while (!last_wqe);
if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
@@ -525,13 +828,19 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
return (i == MLX5E_TX_CQ_POLL_BUDGET);
}
+static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi)
+{
+ int i;
+
+ for (i = 0; i < wi->num_fifo_pkts; i++)
+ dev_kfree_skb_any(mlx5e_skb_fifo_pop(sq));
+}
+
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
{
struct mlx5e_tx_wqe_info *wi;
u32 dma_fifo_cc, nbytes = 0;
u16 ci, sqcc, npkts = 0;
- struct sk_buff *skb;
- int i;
sqcc = sq->cc;
dma_fifo_cc = sq->dma_fifo_cc;
@@ -539,25 +848,28 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
while (sqcc != sq->pc) {
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci];
- skb = wi->skb;
- if (!skb) {
- mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
- sqcc += wi->num_wqebbs;
+ sqcc += wi->num_wqebbs;
+
+ if (likely(wi->skb)) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ dev_kfree_skb_any(wi->skb);
+
+ npkts++;
+ nbytes += wi->num_bytes;
continue;
}
- for (i = 0; i < wi->num_dma; i++) {
- struct mlx5e_sq_dma *dma =
- mlx5e_dma_get(sq, dma_fifo_cc++);
+ if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc)))
+ continue;
- mlx5e_tx_dma_unmap(sq->pdev, dma);
- }
+ if (wi->num_fifo_pkts) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ mlx5e_tx_wi_kfree_fifo_skbs(sq, wi);
- dev_kfree_skb_any(skb);
- npkts++;
- nbytes += wi->num_bytes;
- sqcc += wi->num_wqebbs;
+ npkts += wi->num_fifo_pkts;
+ nbytes += wi->num_bytes;
+ }
}
sq->dma_fifo_cc = dma_fifo_cc;
@@ -576,9 +888,34 @@ mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey,
dseg->av.key.qkey.qkey = cpu_to_be32(dqkey);
}
+static void mlx5i_sq_calc_wqe_attr(struct sk_buff *skb,
+ const struct mlx5e_tx_attr *attr,
+ struct mlx5e_tx_wqe_attr *wqe_attr)
+{
+ u16 ds_cnt = sizeof(struct mlx5i_tx_wqe) / MLX5_SEND_WQE_DS;
+ u16 ds_cnt_inl = 0;
+
+ ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags;
+
+ if (attr->ihs) {
+ u16 inl = attr->ihs - INL_HDR_START_SZ;
+
+ ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS);
+ ds_cnt += ds_cnt_inl;
+ }
+
+ *wqe_attr = (struct mlx5e_tx_wqe_attr) {
+ .ds_cnt = ds_cnt,
+ .ds_cnt_inl = ds_cnt_inl,
+ .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
+ };
+}
+
void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more)
{
+ struct mlx5e_tx_wqe_attr wqe_attr;
+ struct mlx5e_tx_attr attr;
struct mlx5i_tx_wqe *wqe;
struct mlx5_wqe_datagram_seg *datagram;
@@ -588,47 +925,17 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_tx_wqe_info *wi;
struct mlx5e_sq_stats *stats = sq->stats;
- u16 ds_cnt, ds_cnt_inl = 0;
- u8 num_wqebbs, opcode;
- u16 headlen, ihs, pi;
- u32 num_bytes;
int num_dma;
- __be16 mss;
+ u16 pi;
- /* Calc ihs and ds cnt, no writes to wqe yet */
- ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
- if (skb_is_gso(skb)) {
- opcode = MLX5_OPCODE_LSO;
- mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
- ihs = mlx5e_tx_get_gso_ihs(sq, skb);
- num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
- stats->packets += skb_shinfo(skb)->gso_segs;
- } else {
- u8 mode = mlx5e_tx_wqe_inline_mode(sq, NULL, skb);
+ mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr);
+ mlx5i_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
- opcode = MLX5_OPCODE_SEND;
- mss = 0;
- ihs = mlx5e_calc_min_inline(mode, skb);
- num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
- stats->packets++;
- }
+ pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
+ wqe = MLX5I_SQ_FETCH_WQE(sq, pi);
- stats->bytes += num_bytes;
stats->xmit_more += xmit_more;
- headlen = skb->len - ihs - skb->data_len;
- ds_cnt += !!headlen;
- ds_cnt += skb_shinfo(skb)->nr_frags;
-
- if (ihs) {
- ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS);
- ds_cnt += ds_cnt_inl;
- }
-
- num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
- pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs);
- wqe = MLX5I_SQ_FETCH_WQE(sq, pi);
-
/* fill wqe */
wi = &sq->db.wqe_info[pi];
cseg = &wqe->ctrl;
@@ -640,20 +947,20 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
- eseg->mss = mss;
+ eseg->mss = attr.mss;
- if (ihs) {
- memcpy(eseg->inline_hdr.start, skb->data, ihs);
- eseg->inline_hdr.sz = cpu_to_be16(ihs);
- dseg += ds_cnt_inl;
+ if (attr.ihs) {
+ memcpy(eseg->inline_hdr.start, skb->data, attr.ihs);
+ eseg->inline_hdr.sz = cpu_to_be16(attr.ihs);
+ dseg += wqe_attr.ds_cnt_inl;
}
- num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg);
+ num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs,
+ attr.headlen, dseg);
if (unlikely(num_dma < 0))
goto err_drop;
- mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
- num_dma, wi, cseg, xmit_more);
+ mlx5e_txwqe_complete(sq, skb, &attr, &wqe_attr, num_dma, wi, cseg, xmit_more);
return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index de10b06bade5..d5868670f8a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -121,13 +121,17 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
struct mlx5e_xdpsq *xsksq = &c->xsksq;
struct mlx5e_rq *xskrq = &c->xskrq;
struct mlx5e_rq *rq = &c->rq;
- bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
bool aff_change = false;
bool busy_xsk = false;
bool busy = false;
int work_done = 0;
+ bool xsk_open;
int i;
+ rcu_read_lock();
+
+ xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+
ch_stats->poll++;
for (i = 0; i < c->num_tc; i++)
@@ -167,8 +171,10 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
busy |= busy_xsk;
if (busy) {
- if (likely(mlx5e_channel_no_affinity_change(c)))
- return budget;
+ if (likely(mlx5e_channel_no_affinity_change(c))) {
+ work_done = budget;
+ goto out;
+ }
ch_stats->aff_change++;
aff_change = true;
if (budget && work_done == budget)
@@ -176,7 +182,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
}
if (unlikely(!napi_complete_done(napi, work_done)))
- return work_done;
+ goto out;
ch_stats->arm++;
@@ -203,6 +209,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
ch_stats->force_irq++;
}
+out:
+ rcu_read_unlock();
+
return work_done;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 31ef9f8420c8..8ebfe782f95e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -189,6 +189,29 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
return count_eqe;
}
+static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, unsigned long *flags)
+ __acquires(&eq->lock)
+{
+ if (in_irq())
+ spin_lock(&eq->lock);
+ else
+ spin_lock_irqsave(&eq->lock, *flags);
+}
+
+static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, unsigned long *flags)
+ __releases(&eq->lock)
+{
+ if (in_irq())
+ spin_unlock(&eq->lock);
+ else
+ spin_unlock_irqrestore(&eq->lock, *flags);
+}
+
+enum async_eq_nb_action {
+ ASYNC_EQ_IRQ_HANDLER = 0,
+ ASYNC_EQ_RECOVER = 1,
+};
+
static int mlx5_eq_async_int(struct notifier_block *nb,
unsigned long action, void *data)
{
@@ -198,11 +221,14 @@ static int mlx5_eq_async_int(struct notifier_block *nb,
struct mlx5_eq_table *eqt;
struct mlx5_core_dev *dev;
struct mlx5_eqe *eqe;
+ unsigned long flags;
int num_eqes = 0;
dev = eq->dev;
eqt = dev->priv.eq_table;
+ mlx5_eq_async_int_lock(eq_async, &flags);
+
eqe = next_eqe_sw(eq);
if (!eqe)
goto out;
@@ -223,8 +249,19 @@ static int mlx5_eq_async_int(struct notifier_block *nb,
out:
eq_update_ci(eq, 1);
+ mlx5_eq_async_int_unlock(eq_async, &flags);
- return 0;
+ return unlikely(action == ASYNC_EQ_RECOVER) ? num_eqes : 0;
+}
+
+void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_async *eq = &dev->priv.eq_table->cmd_eq;
+ int eqes;
+
+ eqes = mlx5_eq_async_int(&eq->irq_nb, ASYNC_EQ_RECOVER, NULL);
+ if (eqes)
+ mlx5_core_warn(dev, "Recovered %d EQEs on cmd_eq\n", eqes);
}
static void init_eq_buf(struct mlx5_eq *eq)
@@ -569,6 +606,7 @@ setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq,
int err;
eq->irq_nb.notifier_call = mlx5_eq_async_int;
+ spin_lock_init(&eq->lock);
err = create_async_eq(dev, &eq->core, param);
if (err) {
@@ -656,8 +694,10 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
cleanup_async_eq(dev, &table->pages_eq, "pages");
cleanup_async_eq(dev, &table->async_eq, "async");
+ mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ);
mlx5_cmd_use_polling(dev);
cleanup_async_eq(dev, &table->cmd_eq, "cmd");
+ mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
}
@@ -788,8 +828,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
INIT_LIST_HEAD(&eq->tasklet_ctx.list);
INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
spin_lock_init(&eq->tasklet_ctx.lock);
- tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
- (unsigned long)&eq->tasklet_ctx);
+ tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb);
eq->irq_nb.notifier_call = mlx5_eq_comp_int;
param = (struct mlx5_eq_param) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
index 07b2acd7e6b3..c3faae67e4d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
@@ -148,6 +148,11 @@ static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport)
esw_acl_egress_vlan_grp_destroy(vport);
}
+static bool esw_acl_egress_needed(const struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return mlx5_eswitch_is_vf_vport(esw, vport_num);
+}
+
int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
{
int table_size = 0;
@@ -157,6 +162,9 @@ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport
!MLX5_CAP_GEN(esw->dev, prio_tag_required))
return 0;
+ if (!esw_acl_egress_needed(esw, vport->vport))
+ return 0;
+
esw_acl_egress_ofld_rules_destroy(vport);
if (mlx5_esw_acl_egress_fwd2vport_supported(esw))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c
deleted file mode 100644
index d5bf908dfecd..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c
+++ /dev/null
@@ -1,944 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-// Copyright (c) 2020 Mellanox Technologies.
-
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/mlx5_ifc.h>
-#include <linux/mlx5/fs.h>
-
-#include "esw/chains.h"
-#include "en/mapping.h"
-#include "mlx5_core.h"
-#include "fs_core.h"
-#include "eswitch.h"
-#include "en.h"
-#include "en_tc.h"
-
-#define esw_chains_priv(esw) ((esw)->fdb_table.offloads.esw_chains_priv)
-#define esw_chains_lock(esw) (esw_chains_priv(esw)->lock)
-#define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht)
-#define esw_chains_mapping(esw) (esw_chains_priv(esw)->chains_mapping)
-#define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht)
-#define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left)
-#define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb)
-#define tc_end_fdb(esw) (esw_chains_priv(esw)->tc_end_fdb)
-#define fdb_ignore_flow_level_supported(esw) \
- (MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level))
-#define fdb_modify_header_fwd_to_table_supported(esw) \
- (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table))
-
-/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS),
- * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated
- * for each flow table pool. We can allocate up to 16M of each pool,
- * and we keep track of how much we used via get_next_avail_sz_from_pool.
- * Firmware doesn't report any of this for now.
- * ESW_POOL is expected to be sorted from large to small and match firmware
- * pools.
- */
-#define ESW_SIZE (16 * 1024 * 1024)
-static const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024,
- 1 * 1024 * 1024,
- 64 * 1024,
- 128 };
-#define ESW_FT_TBL_SZ (64 * 1024)
-
-struct mlx5_esw_chains_priv {
- struct rhashtable chains_ht;
- struct rhashtable prios_ht;
- /* Protects above chains_ht and prios_ht */
- struct mutex lock;
-
- struct mlx5_flow_table *tc_end_fdb;
- struct mapping_ctx *chains_mapping;
-
- int fdb_left[ARRAY_SIZE(ESW_POOLS)];
-};
-
-struct fdb_chain {
- struct rhash_head node;
-
- u32 chain;
-
- int ref;
- int id;
-
- struct mlx5_eswitch *esw;
- struct list_head prios_list;
- struct mlx5_flow_handle *restore_rule;
- struct mlx5_modify_hdr *miss_modify_hdr;
-};
-
-struct fdb_prio_key {
- u32 chain;
- u32 prio;
- u32 level;
-};
-
-struct fdb_prio {
- struct rhash_head node;
- struct list_head list;
-
- struct fdb_prio_key key;
-
- int ref;
-
- struct fdb_chain *fdb_chain;
- struct mlx5_flow_table *fdb;
- struct mlx5_flow_table *next_fdb;
- struct mlx5_flow_group *miss_group;
- struct mlx5_flow_handle *miss_rule;
-};
-
-static const struct rhashtable_params chain_params = {
- .head_offset = offsetof(struct fdb_chain, node),
- .key_offset = offsetof(struct fdb_chain, chain),
- .key_len = sizeof_field(struct fdb_chain, chain),
- .automatic_shrinking = true,
-};
-
-static const struct rhashtable_params prio_params = {
- .head_offset = offsetof(struct fdb_prio, node),
- .key_offset = offsetof(struct fdb_prio, key),
- .key_len = sizeof_field(struct fdb_prio, key),
- .automatic_shrinking = true,
-};
-
-bool mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw)
-{
- return esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
-}
-
-bool mlx5_esw_chains_backwards_supported(struct mlx5_eswitch *esw)
-{
- return mlx5_esw_chains_prios_supported(esw) &&
- fdb_ignore_flow_level_supported(esw);
-}
-
-u32 mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw)
-{
- if (!mlx5_esw_chains_prios_supported(esw))
- return 1;
-
- if (fdb_ignore_flow_level_supported(esw))
- return UINT_MAX - 1;
-
- return FDB_TC_MAX_CHAIN;
-}
-
-u32 mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw)
-{
- return mlx5_esw_chains_get_chain_range(esw) + 1;
-}
-
-u32 mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw)
-{
- if (!mlx5_esw_chains_prios_supported(esw))
- return 1;
-
- if (fdb_ignore_flow_level_supported(esw))
- return UINT_MAX;
-
- return FDB_TC_MAX_PRIO;
-}
-
-static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw)
-{
- if (fdb_ignore_flow_level_supported(esw))
- return UINT_MAX;
-
- return FDB_TC_LEVELS_PER_PRIO;
-}
-
-#define POOL_NEXT_SIZE 0
-static int
-mlx5_esw_chains_get_avail_sz_from_pool(struct mlx5_eswitch *esw,
- int desired_size)
-{
- int i, found_i = -1;
-
- for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) {
- if (fdb_pool_left(esw)[i] && ESW_POOLS[i] > desired_size) {
- found_i = i;
- if (desired_size != POOL_NEXT_SIZE)
- break;
- }
- }
-
- if (found_i != -1) {
- --fdb_pool_left(esw)[found_i];
- return ESW_POOLS[found_i];
- }
-
- return 0;
-}
-
-static void
-mlx5_esw_chains_put_sz_to_pool(struct mlx5_eswitch *esw, int sz)
-{
- int i;
-
- for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) {
- if (sz == ESW_POOLS[i]) {
- ++fdb_pool_left(esw)[i];
- return;
- }
- }
-
- WARN_ONCE(1, "Couldn't find size %d in fdb size pool", sz);
-}
-
-static void
-mlx5_esw_chains_init_sz_pool(struct mlx5_eswitch *esw)
-{
- u32 fdb_max;
- int i;
-
- fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, log_max_ft_size);
-
- for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--)
- fdb_pool_left(esw)[i] =
- ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
-}
-
-static struct mlx5_flow_table *
-mlx5_esw_chains_create_fdb_table(struct mlx5_eswitch *esw,
- u32 chain, u32 prio, u32 level)
-{
- struct mlx5_flow_table_attr ft_attr = {};
- struct mlx5_flow_namespace *ns;
- struct mlx5_flow_table *fdb;
- int sz;
-
- if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
- ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
- MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
-
- sz = (chain == mlx5_esw_chains_get_ft_chain(esw)) ?
- mlx5_esw_chains_get_avail_sz_from_pool(esw, ESW_FT_TBL_SZ) :
- mlx5_esw_chains_get_avail_sz_from_pool(esw, POOL_NEXT_SIZE);
- if (!sz)
- return ERR_PTR(-ENOSPC);
- ft_attr.max_fte = sz;
-
- /* We use tc_slow_fdb(esw) as the table's next_ft till
- * ignore_flow_level is allowed on FT creation and not just for FTEs.
- * Instead caller should add an explicit miss rule if needed.
- */
- ft_attr.next_ft = tc_slow_fdb(esw);
-
- /* The root table(chain 0, prio 1, level 0) is required to be
- * connected to the previous prio (FDB_BYPASS_PATH if exists).
- * We always create it, as a managed table, in order to align with
- * fs_core logic.
- */
- if (!fdb_ignore_flow_level_supported(esw) ||
- (chain == 0 && prio == 1 && level == 0)) {
- ft_attr.level = level;
- ft_attr.prio = prio - 1;
- ns = mlx5_get_fdb_sub_ns(esw->dev, chain);
- } else {
- ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED;
- ft_attr.prio = FDB_TC_OFFLOAD;
- /* Firmware doesn't allow us to create another level 0 table,
- * so we create all unmanaged tables as level 1.
- *
- * To connect them, we use explicit miss rules with
- * ignore_flow_level. Caller is responsible to create
- * these rules (if needed).
- */
- ft_attr.level = 1;
- ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB);
- }
-
- ft_attr.autogroup.num_reserved_entries = 2;
- ft_attr.autogroup.max_num_groups = esw->params.large_group_num;
- fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
- if (IS_ERR(fdb)) {
- esw_warn(esw->dev,
- "Failed to create FDB table err %d (chain: %d, prio: %d, level: %d, size: %d)\n",
- (int)PTR_ERR(fdb), chain, prio, level, sz);
- mlx5_esw_chains_put_sz_to_pool(esw, sz);
- return fdb;
- }
-
- return fdb;
-}
-
-static void
-mlx5_esw_chains_destroy_fdb_table(struct mlx5_eswitch *esw,
- struct mlx5_flow_table *fdb)
-{
- mlx5_esw_chains_put_sz_to_pool(esw, fdb->max_fte);
- mlx5_destroy_flow_table(fdb);
-}
-
-static int
-create_fdb_chain_restore(struct fdb_chain *fdb_chain)
-{
- char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)];
- struct mlx5_eswitch *esw = fdb_chain->esw;
- struct mlx5_modify_hdr *mod_hdr;
- u32 index;
- int err;
-
- if (fdb_chain->chain == mlx5_esw_chains_get_ft_chain(esw) ||
- !mlx5_esw_chains_prios_supported(esw))
- return 0;
-
- err = mapping_add(esw_chains_mapping(esw), &fdb_chain->chain, &index);
- if (err)
- return err;
- if (index == MLX5_FS_DEFAULT_FLOW_TAG) {
- /* we got the special default flow tag id, so we won't know
- * if we actually marked the packet with the restore rule
- * we create.
- *
- * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0.
- */
- err = mapping_add(esw_chains_mapping(esw),
- &fdb_chain->chain, &index);
- mapping_remove(esw_chains_mapping(esw),
- MLX5_FS_DEFAULT_FLOW_TAG);
- if (err)
- return err;
- }
-
- fdb_chain->id = index;
-
- MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
- MLX5_SET(set_action_in, modact, field,
- mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mfield);
- MLX5_SET(set_action_in, modact, offset,
- mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].moffset * 8);
- MLX5_SET(set_action_in, modact, length,
- mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mlen * 8);
- MLX5_SET(set_action_in, modact, data, fdb_chain->id);
- mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB,
- 1, modact);
- if (IS_ERR(mod_hdr)) {
- err = PTR_ERR(mod_hdr);
- goto err_mod_hdr;
- }
- fdb_chain->miss_modify_hdr = mod_hdr;
-
- fdb_chain->restore_rule = esw_add_restore_rule(esw, fdb_chain->id);
- if (IS_ERR(fdb_chain->restore_rule)) {
- err = PTR_ERR(fdb_chain->restore_rule);
- goto err_rule;
- }
-
- return 0;
-
-err_rule:
- mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr);
-err_mod_hdr:
- /* Datapath can't find this mapping, so we can safely remove it */
- mapping_remove(esw_chains_mapping(esw), fdb_chain->id);
- return err;
-}
-
-static void destroy_fdb_chain_restore(struct fdb_chain *fdb_chain)
-{
- struct mlx5_eswitch *esw = fdb_chain->esw;
-
- if (!fdb_chain->miss_modify_hdr)
- return;
-
- mlx5_del_flow_rules(fdb_chain->restore_rule);
- mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr);
- mapping_remove(esw_chains_mapping(esw), fdb_chain->id);
-}
-
-static struct fdb_chain *
-mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
-{
- struct fdb_chain *fdb_chain = NULL;
- int err;
-
- fdb_chain = kvzalloc(sizeof(*fdb_chain), GFP_KERNEL);
- if (!fdb_chain)
- return ERR_PTR(-ENOMEM);
-
- fdb_chain->esw = esw;
- fdb_chain->chain = chain;
- INIT_LIST_HEAD(&fdb_chain->prios_list);
-
- err = create_fdb_chain_restore(fdb_chain);
- if (err)
- goto err_restore;
-
- err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node,
- chain_params);
- if (err)
- goto err_insert;
-
- return fdb_chain;
-
-err_insert:
- destroy_fdb_chain_restore(fdb_chain);
-err_restore:
- kvfree(fdb_chain);
- return ERR_PTR(err);
-}
-
-static void
-mlx5_esw_chains_destroy_fdb_chain(struct fdb_chain *fdb_chain)
-{
- struct mlx5_eswitch *esw = fdb_chain->esw;
-
- rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node,
- chain_params);
-
- destroy_fdb_chain_restore(fdb_chain);
- kvfree(fdb_chain);
-}
-
-static struct fdb_chain *
-mlx5_esw_chains_get_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
-{
- struct fdb_chain *fdb_chain;
-
- fdb_chain = rhashtable_lookup_fast(&esw_chains_ht(esw), &chain,
- chain_params);
- if (!fdb_chain) {
- fdb_chain = mlx5_esw_chains_create_fdb_chain(esw, chain);
- if (IS_ERR(fdb_chain))
- return fdb_chain;
- }
-
- fdb_chain->ref++;
-
- return fdb_chain;
-}
-
-static struct mlx5_flow_handle *
-mlx5_esw_chains_add_miss_rule(struct fdb_chain *fdb_chain,
- struct mlx5_flow_table *fdb,
- struct mlx5_flow_table *next_fdb)
-{
- struct mlx5_eswitch *esw = fdb_chain->esw;
- struct mlx5_flow_destination dest = {};
- struct mlx5_flow_act act = {};
-
- act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND;
- act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = next_fdb;
-
- if (next_fdb == tc_end_fdb(esw) &&
- mlx5_esw_chains_prios_supported(esw)) {
- act.modify_hdr = fdb_chain->miss_modify_hdr;
- act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- }
-
- return mlx5_add_flow_rules(fdb, NULL, &act, &dest, 1);
-}
-
-static int
-mlx5_esw_chains_update_prio_prevs(struct fdb_prio *fdb_prio,
- struct mlx5_flow_table *next_fdb)
-{
- struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {};
- struct fdb_chain *fdb_chain = fdb_prio->fdb_chain;
- struct fdb_prio *pos;
- int n = 0, err;
-
- if (fdb_prio->key.level)
- return 0;
-
- /* Iterate in reverse order until reaching the level 0 rule of
- * the previous priority, adding all the miss rules first, so we can
- * revert them if any of them fails.
- */
- pos = fdb_prio;
- list_for_each_entry_continue_reverse(pos,
- &fdb_chain->prios_list,
- list) {
- miss_rules[n] = mlx5_esw_chains_add_miss_rule(fdb_chain,
- pos->fdb,
- next_fdb);
- if (IS_ERR(miss_rules[n])) {
- err = PTR_ERR(miss_rules[n]);
- goto err_prev_rule;
- }
-
- n++;
- if (!pos->key.level)
- break;
- }
-
- /* Success, delete old miss rules, and update the pointers. */
- n = 0;
- pos = fdb_prio;
- list_for_each_entry_continue_reverse(pos,
- &fdb_chain->prios_list,
- list) {
- mlx5_del_flow_rules(pos->miss_rule);
-
- pos->miss_rule = miss_rules[n];
- pos->next_fdb = next_fdb;
-
- n++;
- if (!pos->key.level)
- break;
- }
-
- return 0;
-
-err_prev_rule:
- while (--n >= 0)
- mlx5_del_flow_rules(miss_rules[n]);
-
- return err;
-}
-
-static void
-mlx5_esw_chains_put_fdb_chain(struct fdb_chain *fdb_chain)
-{
- if (--fdb_chain->ref == 0)
- mlx5_esw_chains_destroy_fdb_chain(fdb_chain);
-}
-
-static struct fdb_prio *
-mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw,
- u32 chain, u32 prio, u32 level)
-{
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct mlx5_flow_handle *miss_rule = NULL;
- struct mlx5_flow_group *miss_group;
- struct fdb_prio *fdb_prio = NULL;
- struct mlx5_flow_table *next_fdb;
- struct fdb_chain *fdb_chain;
- struct mlx5_flow_table *fdb;
- struct list_head *pos;
- u32 *flow_group_in;
- int err;
-
- fdb_chain = mlx5_esw_chains_get_fdb_chain(esw, chain);
- if (IS_ERR(fdb_chain))
- return ERR_CAST(fdb_chain);
-
- fdb_prio = kvzalloc(sizeof(*fdb_prio), GFP_KERNEL);
- flow_group_in = kvzalloc(inlen, GFP_KERNEL);
- if (!fdb_prio || !flow_group_in) {
- err = -ENOMEM;
- goto err_alloc;
- }
-
- /* Chain's prio list is sorted by prio and level.
- * And all levels of some prio point to the next prio's level 0.
- * Example list (prio, level):
- * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0)
- * In hardware, we will we have the following pointers:
- * (3,0) -> (5,0) -> (7,0) -> Slow path
- * (3,1) -> (5,0)
- * (5,1) -> (7,0)
- * (6,1) -> (7,0)
- */
-
- /* Default miss for each chain: */
- next_fdb = (chain == mlx5_esw_chains_get_ft_chain(esw)) ?
- tc_slow_fdb(esw) :
- tc_end_fdb(esw);
- list_for_each(pos, &fdb_chain->prios_list) {
- struct fdb_prio *p = list_entry(pos, struct fdb_prio, list);
-
- /* exit on first pos that is larger */
- if (prio < p->key.prio || (prio == p->key.prio &&
- level < p->key.level)) {
- /* Get next level 0 table */
- next_fdb = p->key.level == 0 ? p->fdb : p->next_fdb;
- break;
- }
- }
-
- fdb = mlx5_esw_chains_create_fdb_table(esw, chain, prio, level);
- if (IS_ERR(fdb)) {
- err = PTR_ERR(fdb);
- goto err_create;
- }
-
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index,
- fdb->max_fte - 2);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
- fdb->max_fte - 1);
- miss_group = mlx5_create_flow_group(fdb, flow_group_in);
- if (IS_ERR(miss_group)) {
- err = PTR_ERR(miss_group);
- goto err_group;
- }
-
- /* Add miss rule to next_fdb */
- miss_rule = mlx5_esw_chains_add_miss_rule(fdb_chain, fdb, next_fdb);
- if (IS_ERR(miss_rule)) {
- err = PTR_ERR(miss_rule);
- goto err_miss_rule;
- }
-
- fdb_prio->miss_group = miss_group;
- fdb_prio->miss_rule = miss_rule;
- fdb_prio->next_fdb = next_fdb;
- fdb_prio->fdb_chain = fdb_chain;
- fdb_prio->key.chain = chain;
- fdb_prio->key.prio = prio;
- fdb_prio->key.level = level;
- fdb_prio->fdb = fdb;
-
- err = rhashtable_insert_fast(&esw_prios_ht(esw), &fdb_prio->node,
- prio_params);
- if (err)
- goto err_insert;
-
- list_add(&fdb_prio->list, pos->prev);
-
- /* Table is ready, connect it */
- err = mlx5_esw_chains_update_prio_prevs(fdb_prio, fdb);
- if (err)
- goto err_update;
-
- kvfree(flow_group_in);
- return fdb_prio;
-
-err_update:
- list_del(&fdb_prio->list);
- rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node,
- prio_params);
-err_insert:
- mlx5_del_flow_rules(miss_rule);
-err_miss_rule:
- mlx5_destroy_flow_group(miss_group);
-err_group:
- mlx5_esw_chains_destroy_fdb_table(esw, fdb);
-err_create:
-err_alloc:
- kvfree(fdb_prio);
- kvfree(flow_group_in);
- mlx5_esw_chains_put_fdb_chain(fdb_chain);
- return ERR_PTR(err);
-}
-
-static void
-mlx5_esw_chains_destroy_fdb_prio(struct mlx5_eswitch *esw,
- struct fdb_prio *fdb_prio)
-{
- struct fdb_chain *fdb_chain = fdb_prio->fdb_chain;
-
- WARN_ON(mlx5_esw_chains_update_prio_prevs(fdb_prio,
- fdb_prio->next_fdb));
-
- list_del(&fdb_prio->list);
- rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node,
- prio_params);
- mlx5_del_flow_rules(fdb_prio->miss_rule);
- mlx5_destroy_flow_group(fdb_prio->miss_group);
- mlx5_esw_chains_destroy_fdb_table(esw, fdb_prio->fdb);
- mlx5_esw_chains_put_fdb_chain(fdb_chain);
- kvfree(fdb_prio);
-}
-
-struct mlx5_flow_table *
-mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
- u32 level)
-{
- struct mlx5_flow_table *prev_fts;
- struct fdb_prio *fdb_prio;
- struct fdb_prio_key key;
- int l = 0;
-
- if ((chain > mlx5_esw_chains_get_chain_range(esw) &&
- chain != mlx5_esw_chains_get_ft_chain(esw)) ||
- prio > mlx5_esw_chains_get_prio_range(esw) ||
- level > mlx5_esw_chains_get_level_range(esw))
- return ERR_PTR(-EOPNOTSUPP);
-
- /* create earlier levels for correct fs_core lookup when
- * connecting tables.
- */
- for (l = 0; l < level; l++) {
- prev_fts = mlx5_esw_chains_get_table(esw, chain, prio, l);
- if (IS_ERR(prev_fts)) {
- fdb_prio = ERR_CAST(prev_fts);
- goto err_get_prevs;
- }
- }
-
- key.chain = chain;
- key.prio = prio;
- key.level = level;
-
- mutex_lock(&esw_chains_lock(esw));
- fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key,
- prio_params);
- if (!fdb_prio) {
- fdb_prio = mlx5_esw_chains_create_fdb_prio(esw, chain,
- prio, level);
- if (IS_ERR(fdb_prio))
- goto err_create_prio;
- }
-
- ++fdb_prio->ref;
- mutex_unlock(&esw_chains_lock(esw));
-
- return fdb_prio->fdb;
-
-err_create_prio:
- mutex_unlock(&esw_chains_lock(esw));
-err_get_prevs:
- while (--l >= 0)
- mlx5_esw_chains_put_table(esw, chain, prio, l);
- return ERR_CAST(fdb_prio);
-}
-
-void
-mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
- u32 level)
-{
- struct fdb_prio *fdb_prio;
- struct fdb_prio_key key;
-
- key.chain = chain;
- key.prio = prio;
- key.level = level;
-
- mutex_lock(&esw_chains_lock(esw));
- fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key,
- prio_params);
- if (!fdb_prio)
- goto err_get_prio;
-
- if (--fdb_prio->ref == 0)
- mlx5_esw_chains_destroy_fdb_prio(esw, fdb_prio);
- mutex_unlock(&esw_chains_lock(esw));
-
- while (level-- > 0)
- mlx5_esw_chains_put_table(esw, chain, prio, level);
-
- return;
-
-err_get_prio:
- mutex_unlock(&esw_chains_lock(esw));
- WARN_ONCE(1,
- "Couldn't find table: (chain: %d prio: %d level: %d)",
- chain, prio, level);
-}
-
-struct mlx5_flow_table *
-mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw)
-{
- return tc_end_fdb(esw);
-}
-
-struct mlx5_flow_table *
-mlx5_esw_chains_create_global_table(struct mlx5_eswitch *esw)
-{
- u32 chain, prio, level;
- int err;
-
- if (!fdb_ignore_flow_level_supported(esw)) {
- err = -EOPNOTSUPP;
-
- esw_warn(esw->dev,
- "Couldn't create global flow table, ignore_flow_level not supported.");
- goto err_ignore;
- }
-
- chain = mlx5_esw_chains_get_chain_range(esw),
- prio = mlx5_esw_chains_get_prio_range(esw);
- level = mlx5_esw_chains_get_level_range(esw);
-
- return mlx5_esw_chains_create_fdb_table(esw, chain, prio, level);
-
-err_ignore:
- return ERR_PTR(err);
-}
-
-void
-mlx5_esw_chains_destroy_global_table(struct mlx5_eswitch *esw,
- struct mlx5_flow_table *ft)
-{
- mlx5_esw_chains_destroy_fdb_table(esw, ft);
-}
-
-static int
-mlx5_esw_chains_init(struct mlx5_eswitch *esw)
-{
- struct mlx5_esw_chains_priv *chains_priv;
- struct mlx5_core_dev *dev = esw->dev;
- u32 max_flow_counter, fdb_max;
- struct mapping_ctx *mapping;
- int err;
-
- chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL);
- if (!chains_priv)
- return -ENOMEM;
- esw_chains_priv(esw) = chains_priv;
-
- max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
- MLX5_CAP_GEN(dev, max_flow_counter_15_0);
- fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
-
- esw_debug(dev,
- "Init esw offloads chains, max counters(%d), groups(%d), max flow table size(%d)\n",
- max_flow_counter, esw->params.large_group_num, fdb_max);
-
- mlx5_esw_chains_init_sz_pool(esw);
-
- if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) &&
- esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
- esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
- esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n");
- } else if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
- esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
- esw_warn(dev, "Tc chains and priorities offload aren't supported\n");
- } else if (!fdb_modify_header_fwd_to_table_supported(esw)) {
- /* Disabled when ttl workaround is needed, e.g
- * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig
- */
- esw_warn(dev,
- "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n");
- esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
- } else {
- esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
- esw_info(dev, "Supported tc offload range - chains: %u, prios: %u\n",
- mlx5_esw_chains_get_chain_range(esw),
- mlx5_esw_chains_get_prio_range(esw));
- }
-
- err = rhashtable_init(&esw_chains_ht(esw), &chain_params);
- if (err)
- goto init_chains_ht_err;
-
- err = rhashtable_init(&esw_prios_ht(esw), &prio_params);
- if (err)
- goto init_prios_ht_err;
-
- mapping = mapping_create(sizeof(u32), esw_get_max_restore_tag(esw),
- true);
- if (IS_ERR(mapping)) {
- err = PTR_ERR(mapping);
- goto mapping_err;
- }
- esw_chains_mapping(esw) = mapping;
-
- mutex_init(&esw_chains_lock(esw));
-
- return 0;
-
-mapping_err:
- rhashtable_destroy(&esw_prios_ht(esw));
-init_prios_ht_err:
- rhashtable_destroy(&esw_chains_ht(esw));
-init_chains_ht_err:
- kfree(chains_priv);
- return err;
-}
-
-static void
-mlx5_esw_chains_cleanup(struct mlx5_eswitch *esw)
-{
- mutex_destroy(&esw_chains_lock(esw));
- mapping_destroy(esw_chains_mapping(esw));
- rhashtable_destroy(&esw_prios_ht(esw));
- rhashtable_destroy(&esw_chains_ht(esw));
-
- kfree(esw_chains_priv(esw));
-}
-
-static int
-mlx5_esw_chains_open(struct mlx5_eswitch *esw)
-{
- struct mlx5_flow_table *ft;
- int err;
-
- /* Create tc_end_fdb(esw) which is the always created ft chain */
- ft = mlx5_esw_chains_get_table(esw, mlx5_esw_chains_get_ft_chain(esw),
- 1, 0);
- if (IS_ERR(ft))
- return PTR_ERR(ft);
-
- tc_end_fdb(esw) = ft;
-
- /* Always open the root for fast path */
- ft = mlx5_esw_chains_get_table(esw, 0, 1, 0);
- if (IS_ERR(ft)) {
- err = PTR_ERR(ft);
- goto level_0_err;
- }
-
- /* Open level 1 for split rules now if prios isn't supported */
- if (!mlx5_esw_chains_prios_supported(esw)) {
- err = mlx5_esw_vport_tbl_get(esw);
- if (err)
- goto level_1_err;
- }
-
- return 0;
-
-level_1_err:
- mlx5_esw_chains_put_table(esw, 0, 1, 0);
-level_0_err:
- mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0);
- return err;
-}
-
-static void
-mlx5_esw_chains_close(struct mlx5_eswitch *esw)
-{
- if (!mlx5_esw_chains_prios_supported(esw))
- mlx5_esw_vport_tbl_put(esw);
- mlx5_esw_chains_put_table(esw, 0, 1, 0);
- mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0);
-}
-
-int
-mlx5_esw_chains_create(struct mlx5_eswitch *esw)
-{
- int err;
-
- err = mlx5_esw_chains_init(esw);
- if (err)
- return err;
-
- err = mlx5_esw_chains_open(esw);
- if (err)
- goto err_open;
-
- return 0;
-
-err_open:
- mlx5_esw_chains_cleanup(esw);
- return err;
-}
-
-void
-mlx5_esw_chains_destroy(struct mlx5_eswitch *esw)
-{
- mlx5_esw_chains_close(esw);
- mlx5_esw_chains_cleanup(esw);
-}
-
-int
-mlx5_esw_chains_get_chain_mapping(struct mlx5_eswitch *esw, u32 chain,
- u32 *chain_mapping)
-{
- return mapping_add(esw_chains_mapping(esw), &chain, chain_mapping);
-}
-
-int
-mlx5_esw_chains_put_chain_mapping(struct mlx5_eswitch *esw, u32 chain_mapping)
-{
- return mapping_remove(esw_chains_mapping(esw), chain_mapping);
-}
-
-int mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag,
- u32 *chain)
-{
- int err;
-
- err = mapping_find(esw_chains_mapping(esw), tag, chain);
- if (err) {
- esw_warn(esw->dev, "Can't find chain for tag: %d\n", tag);
- return -ENOENT;
- }
-
- return 0;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h
deleted file mode 100644
index 7679ac359e31..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2020 Mellanox Technologies. */
-
-#ifndef __ML5_ESW_CHAINS_H__
-#define __ML5_ESW_CHAINS_H__
-
-#include "eswitch.h"
-
-#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
-
-bool
-mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw);
-bool
-mlx5_esw_chains_backwards_supported(struct mlx5_eswitch *esw);
-u32
-mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw);
-u32
-mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw);
-u32
-mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw);
-
-struct mlx5_flow_table *
-mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
- u32 level);
-void
-mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
- u32 level);
-
-struct mlx5_flow_table *
-mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw);
-
-struct mlx5_flow_table *
-mlx5_esw_chains_create_global_table(struct mlx5_eswitch *esw);
-void
-mlx5_esw_chains_destroy_global_table(struct mlx5_eswitch *esw,
- struct mlx5_flow_table *ft);
-
-int
-mlx5_esw_chains_get_chain_mapping(struct mlx5_eswitch *esw, u32 chain,
- u32 *chain_mapping);
-int
-mlx5_esw_chains_put_chain_mapping(struct mlx5_eswitch *esw,
- u32 chain_mapping);
-
-int mlx5_esw_chains_create(struct mlx5_eswitch *esw);
-void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw);
-
-int
-mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, u32 *chain);
-
-#else /* CONFIG_MLX5_CLS_ACT */
-
-static inline struct mlx5_flow_table *
-mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
- u32 level) { return ERR_PTR(-EOPNOTSUPP); }
-static inline void
-mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
- u32 level) {}
-
-static inline struct mlx5_flow_table *
-mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw) { return ERR_PTR(-EOPNOTSUPP); }
-
-static inline int mlx5_esw_chains_create(struct mlx5_eswitch *esw) { return 0; }
-static inline void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw) {}
-
-#endif /* CONFIG_MLX5_CLS_ACT */
-
-#endif /* __ML5_ESW_CHAINS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
new file mode 100644
index 000000000000..ffff11baa3d0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Ltd. */
+
+#include <linux/mlx5/driver.h>
+#include "eswitch.h"
+
+static void
+mlx5_esw_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid)
+{
+ u64 parent_id;
+
+ parent_id = mlx5_query_nic_system_image_guid(dev);
+ ppid->id_len = sizeof(parent_id);
+ memcpy(ppid->id, &parent_id, sizeof(parent_id));
+}
+
+static bool
+mlx5_esw_devlink_port_supported(const struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return vport_num == MLX5_VPORT_UPLINK ||
+ (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) ||
+ mlx5_eswitch_is_vf_vport(esw, vport_num);
+}
+
+static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct devlink_port_attrs attrs = {};
+ struct netdev_phys_item_id ppid = {};
+ struct devlink_port *dl_port;
+ u32 controller_num = 0;
+ bool external;
+ u16 pfnum;
+
+ dl_port = kzalloc(sizeof(*dl_port), GFP_KERNEL);
+ if (!dl_port)
+ return NULL;
+
+ mlx5_esw_get_port_parent_id(dev, &ppid);
+ pfnum = PCI_FUNC(dev->pdev->devfn);
+ external = mlx5_core_is_ecpf_esw_manager(dev);
+ if (external)
+ controller_num = dev->priv.eswitch->offloads.host_number + 1;
+
+ if (vport_num == MLX5_VPORT_UPLINK) {
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+ attrs.phys.port_number = pfnum;
+ memcpy(attrs.switch_id.id, ppid.id, ppid.id_len);
+ attrs.switch_id.id_len = ppid.id_len;
+ devlink_port_attrs_set(dl_port, &attrs);
+ } else if (vport_num == MLX5_VPORT_PF) {
+ memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len);
+ dl_port->attrs.switch_id.id_len = ppid.id_len;
+ devlink_port_attrs_pci_pf_set(dl_port, controller_num, pfnum, external);
+ } else if (mlx5_eswitch_is_vf_vport(esw, vport_num)) {
+ memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len);
+ dl_port->attrs.switch_id.id_len = ppid.id_len;
+ devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum,
+ vport_num - 1, external);
+ }
+ return dl_port;
+}
+
+static void mlx5_esw_dl_port_free(struct devlink_port *dl_port)
+{
+ kfree(dl_port);
+}
+
+int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct devlink_port *dl_port;
+ unsigned int dl_port_index;
+ struct mlx5_vport *vport;
+ struct devlink *devlink;
+ int err;
+
+ if (!mlx5_esw_devlink_port_supported(esw, vport_num))
+ return 0;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ dl_port = mlx5_esw_dl_port_alloc(esw, vport_num);
+ if (!dl_port)
+ return -ENOMEM;
+
+ devlink = priv_to_devlink(dev);
+ dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num);
+ err = devlink_port_register(devlink, dl_port, dl_port_index);
+ if (err)
+ goto reg_err;
+
+ vport->dl_port = dl_port;
+ return 0;
+
+reg_err:
+ mlx5_esw_dl_port_free(dl_port);
+ return err;
+}
+
+void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport;
+
+ if (!mlx5_esw_devlink_port_supported(esw, vport_num))
+ return;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return;
+ devlink_port_unregister(vport->dl_port);
+ mlx5_esw_dl_port_free(vport->dl_port);
+ vport->dl_port = NULL;
+}
+
+struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ return vport->dl_port;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 867d8120b8a5..cf87de94418f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -42,6 +42,7 @@
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
#include "lib/mpfs.h"
+#include "lib/fs_chains.h"
#include "en/tc_ct.h"
#ifdef CONFIG_MLX5_ESWITCH
@@ -62,6 +63,9 @@
#define mlx5_esw_has_fwd_fdb(dev) \
MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table)
+#define esw_chains(esw) \
+ ((esw)->fdb_table.offloads.esw_chains_priv)
+
struct vport_ingress {
struct mlx5_flow_table *acl;
struct mlx5_flow_handle *allow_rule;
@@ -152,14 +156,9 @@ struct mlx5_vport {
bool enabled;
enum mlx5_eswitch_vport_event enabled_events;
+ struct devlink_port *dl_port;
};
-enum offloads_fdb_flags {
- ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED = BIT(0),
-};
-
-struct mlx5_esw_chains_priv;
-
struct mlx5_eswitch_fdb {
union {
struct legacy_fdb {
@@ -183,7 +182,7 @@ struct mlx5_eswitch_fdb {
struct mlx5_flow_handle *miss_rule_multi;
int vlan_push_pop_refcount;
- struct mlx5_esw_chains_priv *esw_chains_priv;
+ struct mlx5_fs_chains *esw_chains_priv;
struct {
DECLARE_HASHTABLE(table, 8);
/* Protects vports.table */
@@ -217,6 +216,7 @@ struct mlx5_esw_offload {
atomic64_t num_flows;
enum devlink_eswitch_encap_mode encap;
struct ida vport_metadata_ida;
+ unsigned int host_number; /* ECPF supports one external host */
};
/* E-Switch MC FDB table hash node */
@@ -329,7 +329,7 @@ struct mlx5_termtbl_handle;
bool
mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
- struct mlx5_esw_flow_attr *attr,
+ struct mlx5_flow_attr *attr,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_spec *spec);
@@ -349,19 +349,19 @@ mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *
mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
- struct mlx5_esw_flow_attr *attr);
+ struct mlx5_flow_attr *attr);
struct mlx5_flow_handle *
mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
- struct mlx5_esw_flow_attr *attr);
+ struct mlx5_flow_attr *attr);
void
mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule,
- struct mlx5_esw_flow_attr *attr);
+ struct mlx5_flow_attr *attr);
void
mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule,
- struct mlx5_esw_flow_attr *attr);
+ struct mlx5_flow_attr *attr);
struct mlx5_flow_handle *
mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
@@ -401,7 +401,6 @@ struct mlx5_esw_flow_attr {
int split_count;
int out_count;
- int action;
__be16 vlan_proto[MLX5_FS_VLAN_DEPTH];
u16 vlan_vid[MLX5_FS_VLAN_DEPTH];
u8 vlan_prio[MLX5_FS_VLAN_DEPTH];
@@ -413,19 +412,7 @@ struct mlx5_esw_flow_attr {
struct mlx5_core_dev *mdev;
struct mlx5_termtbl_handle *termtbl;
} dests[MLX5_MAX_FLOW_FWD_VPORTS];
- struct mlx5_modify_hdr *modify_hdr;
- u8 inner_match_level;
- u8 outer_match_level;
- struct mlx5_fc *counter;
- u32 chain;
- u16 prio;
- u32 dest_chain;
- u32 flags;
- struct mlx5_flow_table *fdb;
- struct mlx5_flow_table *dest_ft;
- struct mlx5_ct_attr ct_attr;
struct mlx5_pkt_reformat *decap_pkt_reformat;
- struct mlx5e_tc_flow_parse_attr *parse_attr;
};
int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
@@ -451,9 +438,9 @@ int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink,
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
- struct mlx5_esw_flow_attr *attr);
+ struct mlx5_flow_attr *attr);
int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
- struct mlx5_esw_flow_attr *attr);
+ struct mlx5_flow_attr *attr);
int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
u16 vport, u16 vlan, u8 qos, u8 set_flags);
@@ -677,6 +664,9 @@ int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs,
enum mlx5_eswitch_vport_event enabled_events);
void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs);
+int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num);
+void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
+struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num);
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index d2516922d867..c9c2962ad49f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -39,12 +39,13 @@
#include "mlx5_core.h"
#include "eswitch.h"
#include "esw/acl/ofld.h"
-#include "esw/chains.h"
#include "rdma.h"
#include "en.h"
#include "fs_core.h"
#include "lib/devcom.h"
#include "lib/eq.h"
+#include "lib/fs_chains.h"
+#include "en_tc.h"
/* There are two match-all miss flows, one for unicast dst mac and
* one for multicast.
@@ -66,6 +67,12 @@ struct mlx5_vport_key {
u16 vhca_id;
} __packed;
+struct mlx5_vport_tbl_attr {
+ u16 chain;
+ u16 prio;
+ u16 vport;
+};
+
struct mlx5_vport_table {
struct hlist_node hlist;
struct mlx5_flow_table *fdb;
@@ -94,10 +101,10 @@ esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns)
}
static u32 flow_attr_to_vport_key(struct mlx5_eswitch *esw,
- struct mlx5_esw_flow_attr *attr,
+ struct mlx5_vport_tbl_attr *attr,
struct mlx5_vport_key *key)
{
- key->vport = attr->in_rep->vport;
+ key->vport = attr->vport;
key->chain = attr->chain;
key->prio = attr->prio;
key->vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
@@ -118,7 +125,7 @@ esw_vport_tbl_lookup(struct mlx5_eswitch *esw, struct mlx5_vport_key *skey, u32
}
static void
-esw_vport_tbl_put(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr)
+esw_vport_tbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
{
struct mlx5_vport_table *e;
struct mlx5_vport_key key;
@@ -138,7 +145,7 @@ out:
}
static struct mlx5_flow_table *
-esw_vport_tbl_get(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr)
+esw_vport_tbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
{
struct mlx5_core_dev *dev = esw->dev;
struct mlx5_flow_namespace *ns;
@@ -189,16 +196,15 @@ err_alloc:
int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw)
{
- struct mlx5_esw_flow_attr attr = {};
- struct mlx5_eswitch_rep rep = {};
+ struct mlx5_vport_tbl_attr attr;
struct mlx5_flow_table *fdb;
struct mlx5_vport *vport;
int i;
+ attr.chain = 0;
attr.prio = 1;
- attr.in_rep = &rep;
mlx5_esw_for_all_vports(esw, i, vport) {
- attr.in_rep->vport = vport->vport;
+ attr.vport = vport->vport;
fdb = esw_vport_tbl_get(esw, &attr);
if (IS_ERR(fdb))
goto out;
@@ -212,15 +218,14 @@ out:
void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw)
{
- struct mlx5_esw_flow_attr attr = {};
- struct mlx5_eswitch_rep rep = {};
+ struct mlx5_vport_tbl_attr attr;
struct mlx5_vport *vport;
int i;
+ attr.chain = 0;
attr.prio = 1;
- attr.in_rep = &rep;
mlx5_esw_for_all_vports(esw, i, vport) {
- attr.in_rep->vport = vport->vport;
+ attr.vport = vport->vport;
esw_vport_tbl_put(esw, &attr);
}
}
@@ -242,8 +247,11 @@ mlx5_eswitch_set_rule_flow_source(struct mlx5_eswitch *esw,
struct mlx5_esw_flow_attr *attr)
{
if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) &&
- attr && attr->in_rep && attr->in_rep->vport == MLX5_VPORT_UPLINK)
- spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+ attr && attr->in_rep)
+ spec->flow_context.flow_source =
+ attr->in_rep->vport == MLX5_VPORT_UPLINK ?
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK :
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
}
static void
@@ -290,11 +298,14 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *
mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
- bool split = !!(attr->split_count);
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ bool split = !!(esw_attr->split_count);
+ struct mlx5_vport_tbl_attr fwd_attr;
struct mlx5_flow_handle *rule;
struct mlx5_flow_table *fdb;
int j, i = 0;
@@ -308,13 +319,13 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
- flow_act.vlan[0].ethtype = ntohs(attr->vlan_proto[0]);
- flow_act.vlan[0].vid = attr->vlan_vid[0];
- flow_act.vlan[0].prio = attr->vlan_prio[0];
+ flow_act.vlan[0].ethtype = ntohs(esw_attr->vlan_proto[0]);
+ flow_act.vlan[0].vid = esw_attr->vlan_vid[0];
+ flow_act.vlan[0].prio = esw_attr->vlan_prio[0];
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
- flow_act.vlan[1].ethtype = ntohs(attr->vlan_proto[1]);
- flow_act.vlan[1].vid = attr->vlan_vid[1];
- flow_act.vlan[1].prio = attr->vlan_prio[1];
+ flow_act.vlan[1].ethtype = ntohs(esw_attr->vlan_proto[1]);
+ flow_act.vlan[1].vid = esw_attr->vlan_vid[1];
+ flow_act.vlan[1].prio = esw_attr->vlan_prio[1];
}
}
@@ -329,12 +340,12 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
} else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) {
flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest[i].ft = mlx5_esw_chains_get_tc_end_ft(esw);
+ dest[i].ft = mlx5_chains_get_tc_end_ft(chains);
i++;
} else if (attr->dest_chain) {
flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
- ft = mlx5_esw_chains_get_table(esw, attr->dest_chain,
- 1, 0);
+ ft = mlx5_chains_get_table(chains, attr->dest_chain,
+ 1, 0);
if (IS_ERR(ft)) {
rule = ERR_CAST(ft);
goto err_create_goto_table;
@@ -344,28 +355,29 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
dest[i].ft = ft;
i++;
} else {
- for (j = attr->split_count; j < attr->out_count; j++) {
+ for (j = esw_attr->split_count; j < esw_attr->out_count; j++) {
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- dest[i].vport.num = attr->dests[j].rep->vport;
+ dest[i].vport.num = esw_attr->dests[j].rep->vport;
dest[i].vport.vhca_id =
- MLX5_CAP_GEN(attr->dests[j].mdev, vhca_id);
+ MLX5_CAP_GEN(esw_attr->dests[j].mdev, vhca_id);
if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
dest[i].vport.flags |=
MLX5_FLOW_DEST_VPORT_VHCA_ID;
- if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) {
+ if (esw_attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) {
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
- flow_act.pkt_reformat = attr->dests[j].pkt_reformat;
+ flow_act.pkt_reformat =
+ esw_attr->dests[j].pkt_reformat;
dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
dest[i].vport.pkt_reformat =
- attr->dests[j].pkt_reformat;
+ esw_attr->dests[j].pkt_reformat;
}
i++;
}
}
}
- if (attr->decap_pkt_reformat)
- flow_act.pkt_reformat = attr->decap_pkt_reformat;
+ if (esw_attr->decap_pkt_reformat)
+ flow_act.pkt_reformat = esw_attr->decap_pkt_reformat;
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
@@ -382,26 +394,30 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
flow_act.modify_hdr = attr->modify_hdr;
if (split) {
- fdb = esw_vport_tbl_get(esw, attr);
+ fwd_attr.chain = attr->chain;
+ fwd_attr.prio = attr->prio;
+ fwd_attr.vport = esw_attr->in_rep->vport;
+
+ fdb = esw_vport_tbl_get(esw, &fwd_attr);
} else {
if (attr->chain || attr->prio)
- fdb = mlx5_esw_chains_get_table(esw, attr->chain,
- attr->prio, 0);
+ fdb = mlx5_chains_get_table(chains, attr->chain,
+ attr->prio, 0);
else
- fdb = attr->fdb;
+ fdb = attr->ft;
if (!(attr->flags & MLX5_ESW_ATTR_FLAG_NO_IN_PORT))
- mlx5_eswitch_set_rule_source_port(esw, spec, attr);
+ mlx5_eswitch_set_rule_source_port(esw, spec, esw_attr);
}
if (IS_ERR(fdb)) {
rule = ERR_CAST(fdb);
goto err_esw_get;
}
- mlx5_eswitch_set_rule_flow_source(esw, spec, attr);
+ mlx5_eswitch_set_rule_flow_source(esw, spec, esw_attr);
if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec))
- rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr,
+ rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, esw_attr,
&flow_act, dest, i);
else
rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i);
@@ -414,12 +430,12 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
err_add_rule:
if (split)
- esw_vport_tbl_put(esw, attr);
+ esw_vport_tbl_put(esw, &fwd_attr);
else if (attr->chain || attr->prio)
- mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0);
+ mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
err_esw_get:
if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) && attr->dest_chain)
- mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0);
+ mlx5_chains_put_table(chains, attr->dest_chain, 1, 0);
err_create_goto_table:
return rule;
}
@@ -427,46 +443,51 @@ err_create_goto_table:
struct mlx5_flow_handle *
mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ struct mlx5_vport_tbl_attr fwd_attr;
struct mlx5_flow_table *fast_fdb;
struct mlx5_flow_table *fwd_fdb;
struct mlx5_flow_handle *rule;
int i;
- fast_fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, 0);
+ fast_fdb = mlx5_chains_get_table(chains, attr->chain, attr->prio, 0);
if (IS_ERR(fast_fdb)) {
rule = ERR_CAST(fast_fdb);
goto err_get_fast;
}
- fwd_fdb = esw_vport_tbl_get(esw, attr);
+ fwd_attr.chain = attr->chain;
+ fwd_attr.prio = attr->prio;
+ fwd_attr.vport = esw_attr->in_rep->vport;
+ fwd_fdb = esw_vport_tbl_get(esw, &fwd_attr);
if (IS_ERR(fwd_fdb)) {
rule = ERR_CAST(fwd_fdb);
goto err_get_fwd;
}
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- for (i = 0; i < attr->split_count; i++) {
+ for (i = 0; i < esw_attr->split_count; i++) {
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- dest[i].vport.num = attr->dests[i].rep->vport;
+ dest[i].vport.num = esw_attr->dests[i].rep->vport;
dest[i].vport.vhca_id =
- MLX5_CAP_GEN(attr->dests[i].mdev, vhca_id);
+ MLX5_CAP_GEN(esw_attr->dests[i].mdev, vhca_id);
if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
- if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) {
+ if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) {
dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
- dest[i].vport.pkt_reformat = attr->dests[i].pkt_reformat;
+ dest[i].vport.pkt_reformat = esw_attr->dests[i].pkt_reformat;
}
}
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest[i].ft = fwd_fdb,
i++;
- mlx5_eswitch_set_rule_source_port(esw, spec, attr);
- mlx5_eswitch_set_rule_flow_source(esw, spec, attr);
+ mlx5_eswitch_set_rule_source_port(esw, spec, esw_attr);
if (attr->outer_match_level != MLX5_MATCH_NONE)
spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
@@ -481,9 +502,9 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
return rule;
add_err:
- esw_vport_tbl_put(esw, attr);
+ esw_vport_tbl_put(esw, &fwd_attr);
err_get_fwd:
- mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0);
+ mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
err_get_fast:
return rule;
}
@@ -491,10 +512,13 @@ err_get_fast:
static void
__mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule,
- struct mlx5_esw_flow_attr *attr,
+ struct mlx5_flow_attr *attr,
bool fwd_rule)
{
- bool split = (attr->split_count > 0);
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_fs_chains *chains = esw_chains(esw);
+ bool split = (esw_attr->split_count > 0);
+ struct mlx5_vport_tbl_attr fwd_attr;
int i;
mlx5_del_flow_rules(rule);
@@ -502,31 +526,36 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)) {
/* unref the term table */
for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
- if (attr->dests[i].termtbl)
- mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl);
+ if (esw_attr->dests[i].termtbl)
+ mlx5_eswitch_termtbl_put(esw, esw_attr->dests[i].termtbl);
}
}
atomic64_dec(&esw->offloads.num_flows);
+ if (fwd_rule || split) {
+ fwd_attr.chain = attr->chain;
+ fwd_attr.prio = attr->prio;
+ fwd_attr.vport = esw_attr->in_rep->vport;
+ }
+
if (fwd_rule) {
- esw_vport_tbl_put(esw, attr);
- mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0);
+ esw_vport_tbl_put(esw, &fwd_attr);
+ mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
} else {
if (split)
- esw_vport_tbl_put(esw, attr);
+ esw_vport_tbl_put(esw, &fwd_attr);
else if (attr->chain || attr->prio)
- mlx5_esw_chains_put_table(esw, attr->chain, attr->prio,
- 0);
+ mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
if (attr->dest_chain)
- mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0);
+ mlx5_chains_put_table(chains, attr->dest_chain, 1, 0);
}
}
void
mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
__mlx5_eswitch_del_rule(esw, rule, attr, false);
}
@@ -534,7 +563,7 @@ mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
void
mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
__mlx5_eswitch_del_rule(esw, rule, attr, true);
}
@@ -611,9 +640,10 @@ out_notsupp:
}
int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
struct offloads_fdb *offloads = &esw->fdb_table.offloads;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
struct mlx5_eswitch_rep *vport = NULL;
bool push, pop, fwd;
int err = 0;
@@ -629,17 +659,17 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
mutex_lock(&esw->state_lock);
- err = esw_add_vlan_action_check(attr, push, pop, fwd);
+ err = esw_add_vlan_action_check(esw_attr, push, pop, fwd);
if (err)
goto unlock;
attr->flags &= ~MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
- vport = esw_vlan_action_get_vport(attr, push, pop);
+ vport = esw_vlan_action_get_vport(esw_attr, push, pop);
if (!push && !pop && fwd) {
/* tracks VF --> wire rules without vlan push action */
- if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
+ if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
vport->vlan_refcount++;
attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
}
@@ -662,11 +692,11 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
if (vport->vlan_refcount)
goto skip_set_push;
- err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan_vid[0], 0,
- SET_VLAN_INSERT | SET_VLAN_STRIP);
+ err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, esw_attr->vlan_vid[0],
+ 0, SET_VLAN_INSERT | SET_VLAN_STRIP);
if (err)
goto out;
- vport->vlan = attr->vlan_vid[0];
+ vport->vlan = esw_attr->vlan_vid[0];
skip_set_push:
vport->vlan_refcount++;
}
@@ -679,9 +709,10 @@ unlock:
}
int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
struct offloads_fdb *offloads = &esw->fdb_table.offloads;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
struct mlx5_eswitch_rep *vport = NULL;
bool push, pop, fwd;
int err = 0;
@@ -699,11 +730,11 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
mutex_lock(&esw->state_lock);
- vport = esw_vlan_action_get_vport(attr, push, pop);
+ vport = esw_vlan_action_get_vport(esw_attr, push, pop);
if (!push && !pop && fwd) {
/* tracks VF --> wire rules without vlan push action */
- if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK)
+ if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK)
vport->vlan_refcount--;
goto out;
@@ -1137,6 +1168,126 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
}
}
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+#define fdb_modify_header_fwd_to_table_supported(esw) \
+ (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table))
+static void esw_init_chains_offload_flags(struct mlx5_eswitch *esw, u32 *flags)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ignore_flow_level))
+ *flags |= MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE(dev, multi_fdb_encap) &&
+ esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
+ *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+ esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n");
+ } else if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
+ *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+ esw_warn(dev, "Tc chains and priorities offload aren't supported\n");
+ } else if (!fdb_modify_header_fwd_to_table_supported(esw)) {
+ /* Disabled when ttl workaround is needed, e.g
+ * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig
+ */
+ esw_warn(dev,
+ "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n");
+ *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+ } else {
+ *flags |= MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+ esw_info(dev, "Supported tc chains and prios offload\n");
+ }
+
+ if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
+ *flags |= MLX5_CHAINS_FT_TUNNEL_SUPPORTED;
+}
+
+static int
+esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_table *nf_ft, *ft;
+ struct mlx5_chains_attr attr = {};
+ struct mlx5_fs_chains *chains;
+ u32 fdb_max;
+ int err;
+
+ fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
+
+ esw_init_chains_offload_flags(esw, &attr.flags);
+ attr.ns = MLX5_FLOW_NAMESPACE_FDB;
+ attr.max_ft_sz = fdb_max;
+ attr.max_grp_num = esw->params.large_group_num;
+ attr.default_ft = miss_fdb;
+ attr.max_restore_tag = esw_get_max_restore_tag(esw);
+
+ chains = mlx5_chains_create(dev, &attr);
+ if (IS_ERR(chains)) {
+ err = PTR_ERR(chains);
+ esw_warn(dev, "Failed to create fdb chains err(%d)\n", err);
+ return err;
+ }
+
+ esw->fdb_table.offloads.esw_chains_priv = chains;
+
+ /* Create tc_end_ft which is the always created ft chain */
+ nf_ft = mlx5_chains_get_table(chains, mlx5_chains_get_nf_ft_chain(chains),
+ 1, 0);
+ if (IS_ERR(nf_ft)) {
+ err = PTR_ERR(nf_ft);
+ goto nf_ft_err;
+ }
+
+ /* Always open the root for fast path */
+ ft = mlx5_chains_get_table(chains, 0, 1, 0);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto level_0_err;
+ }
+
+ /* Open level 1 for split fdb rules now if prios isn't supported */
+ if (!mlx5_chains_prios_supported(chains)) {
+ err = mlx5_esw_vport_tbl_get(esw);
+ if (err)
+ goto level_1_err;
+ }
+
+ mlx5_chains_set_end_ft(chains, nf_ft);
+
+ return 0;
+
+level_1_err:
+ mlx5_chains_put_table(chains, 0, 1, 0);
+level_0_err:
+ mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0);
+nf_ft_err:
+ mlx5_chains_destroy(chains);
+ esw->fdb_table.offloads.esw_chains_priv = NULL;
+
+ return err;
+}
+
+static void
+esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains)
+{
+ if (!mlx5_chains_prios_supported(chains))
+ mlx5_esw_vport_tbl_put(esw);
+ mlx5_chains_put_table(chains, 0, 1, 0);
+ mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0);
+ mlx5_chains_destroy(chains);
+}
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static int
+esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
+{ return 0; }
+
+static void
+esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains)
+{}
+
+#endif
+
static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
@@ -1192,9 +1343,9 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
}
esw->fdb_table.offloads.slow_fdb = fdb;
- err = mlx5_esw_chains_create(esw);
+ err = esw_chains_create(esw, fdb);
if (err) {
- esw_warn(dev, "Failed to create fdb chains err(%d)\n", err);
+ esw_warn(dev, "Failed to open fdb chains err(%d)\n", err);
goto fdb_chains_err;
}
@@ -1219,35 +1370,37 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
}
esw->fdb_table.offloads.send_to_vport_grp = g;
- /* create peer esw miss group */
- memset(flow_group_in, 0, inlen);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
+ /* create peer esw miss group */
+ memset(flow_group_in, 0, inlen);
- esw_set_flow_group_source_port(esw, flow_group_in);
+ esw_set_flow_group_source_port(esw, flow_group_in);
- if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
- match_criteria = MLX5_ADDR_OF(create_flow_group_in,
- flow_group_in,
- match_criteria);
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in,
+ match_criteria);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria,
- misc_parameters.source_eswitch_owner_vhca_id);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
- MLX5_SET(create_flow_group_in, flow_group_in,
- source_eswitch_owner_vhca_id_valid, 1);
- }
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ }
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
- ix + esw->total_vports - 1);
- ix += esw->total_vports;
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ ix + esw->total_vports - 1);
+ ix += esw->total_vports;
- g = mlx5_create_flow_group(fdb, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err);
- goto peer_miss_err;
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err);
+ goto peer_miss_err;
+ }
+ esw->fdb_table.offloads.peer_miss_grp = g;
}
- esw->fdb_table.offloads.peer_miss_grp = g;
/* create miss group */
memset(flow_group_in, 0, inlen);
@@ -1281,11 +1434,12 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
miss_rule_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
miss_err:
- mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
peer_miss_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
send_vport_err:
- mlx5_esw_chains_destroy(esw);
+ esw_chains_destroy(esw, esw_chains(esw));
fdb_chains_err:
mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
slow_fdb_err:
@@ -1305,10 +1459,12 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
- mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
- mlx5_esw_chains_destroy(esw);
+ esw_chains_destroy(esw, esw_chains(esw));
+
mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
/* Holds true only as long as DMFS is the default */
mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns,
@@ -1667,15 +1823,12 @@ static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
__esw_offloads_unload_rep(esw, rep, rep_type);
}
-int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num)
+static int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num)
{
struct mlx5_eswitch_rep *rep;
int rep_type;
int err;
- if (esw->mode != MLX5_ESWITCH_OFFLOADS)
- return 0;
-
rep = mlx5_eswitch_get_rep(esw, vport_num);
for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
@@ -1694,19 +1847,46 @@ err_reps:
return err;
}
-void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
+static void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num)
{
struct mlx5_eswitch_rep *rep;
int rep_type;
- if (esw->mode != MLX5_ESWITCH_OFFLOADS)
- return;
-
rep = mlx5_eswitch_get_rep(esw, vport_num);
for (rep_type = NUM_REP_TYPES - 1; rep_type >= 0; rep_type--)
__esw_offloads_unload_rep(esw, rep, rep_type);
}
+int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ int err;
+
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
+ return 0;
+
+ err = mlx5_esw_offloads_devlink_port_register(esw, vport_num);
+ if (err)
+ return err;
+
+ err = mlx5_esw_offloads_rep_load(esw, vport_num);
+ if (err)
+ goto load_err;
+ return err;
+
+load_err:
+ mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
+ return err;
+}
+
+void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
+ return;
+
+ mlx5_esw_offloads_rep_unload(esw, vport_num);
+ mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
+}
+
#define ESW_OFFLOADS_DEVCOM_PAIR (0)
#define ESW_OFFLOADS_DEVCOM_UNPAIR (1)
@@ -1864,53 +2044,38 @@ esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
return true;
}
-static bool
-esw_check_vport_match_metadata_mandatory(const struct mlx5_eswitch *esw)
-{
- return mlx5_core_mp_enabled(esw->dev);
-}
-
-static bool esw_use_vport_metadata(const struct mlx5_eswitch *esw)
-{
- return esw_check_vport_match_metadata_mandatory(esw) &&
- esw_check_vport_match_metadata_supported(esw);
-}
-
u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw)
{
- u32 num_vports = GENMASK(ESW_VPORT_BITS - 1, 0) - 1;
- u32 vhca_id_mask = GENMASK(ESW_VHCA_ID_BITS - 1, 0);
- u32 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
- u32 start;
- u32 end;
+ u32 vport_end_ida = (1 << ESW_VPORT_BITS) - 1;
+ u32 max_pf_num = (1 << ESW_PFNUM_BITS) - 1;
+ u32 pf_num;
int id;
- /* Make sure the vhca_id fits the ESW_VHCA_ID_BITS */
- WARN_ON_ONCE(vhca_id >= BIT(ESW_VHCA_ID_BITS));
-
- /* Trim vhca_id to ESW_VHCA_ID_BITS */
- vhca_id &= vhca_id_mask;
-
- start = (vhca_id << ESW_VPORT_BITS);
- end = start + num_vports;
- if (!vhca_id)
- start += 1; /* zero is reserved/invalid metadata */
- id = ida_alloc_range(&esw->offloads.vport_metadata_ida, start, end, GFP_KERNEL);
+ /* Only 4 bits of pf_num */
+ pf_num = PCI_FUNC(esw->dev->pdev->devfn);
+ if (pf_num > max_pf_num)
+ return 0;
- return (id < 0) ? 0 : id;
+ /* Metadata is 4 bits of PFNUM and 12 bits of unique id */
+ /* Use only non-zero vport_id (1-4095) for all PF's */
+ id = ida_alloc_range(&esw->offloads.vport_metadata_ida, 1, vport_end_ida, GFP_KERNEL);
+ if (id < 0)
+ return 0;
+ id = (pf_num << ESW_VPORT_BITS) | id;
+ return id;
}
void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata)
{
- ida_free(&esw->offloads.vport_metadata_ida, metadata);
+ u32 vport_bit_mask = (1 << ESW_VPORT_BITS) - 1;
+
+ /* Metadata contains only 12 bits of actual ida id */
+ ida_free(&esw->offloads.vport_metadata_ida, metadata & vport_bit_mask);
}
static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
- if (vport->vport == MLX5_VPORT_UPLINK)
- return 0;
-
vport->default_metadata = mlx5_esw_match_metadata_alloc(esw);
vport->metadata = vport->default_metadata;
return vport->metadata ? 0 : -ENOSPC;
@@ -1919,40 +2084,65 @@ static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw,
static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
- if (vport->vport == MLX5_VPORT_UPLINK || !vport->default_metadata)
+ if (!vport->default_metadata)
return;
WARN_ON(vport->metadata != vport->default_metadata);
mlx5_esw_match_metadata_free(esw, vport->default_metadata);
}
+static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ int i;
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+ return;
+
+ mlx5_esw_for_all_vports_reverse(esw, i, vport)
+ esw_offloads_vport_metadata_cleanup(esw, vport);
+}
+
+static int esw_offloads_metadata_init(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ int err;
+ int i;
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+ return 0;
+
+ mlx5_esw_for_all_vports(esw, i, vport) {
+ err = esw_offloads_vport_metadata_setup(esw, vport);
+ if (err)
+ goto metadata_err;
+ }
+
+ return 0;
+
+metadata_err:
+ esw_offloads_metadata_uninit(esw);
+ return err;
+}
+
int
esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
int err;
- err = esw_offloads_vport_metadata_setup(esw, vport);
- if (err)
- goto metadata_err;
-
err = esw_acl_ingress_ofld_setup(esw, vport);
if (err)
- goto ingress_err;
+ return err;
- if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
- err = esw_acl_egress_ofld_setup(esw, vport);
- if (err)
- goto egress_err;
- }
+ err = esw_acl_egress_ofld_setup(esw, vport);
+ if (err)
+ goto egress_err;
return 0;
egress_err:
esw_acl_ingress_ofld_cleanup(esw, vport);
-ingress_err:
- esw_offloads_vport_metadata_cleanup(esw, vport);
-metadata_err:
return err;
}
@@ -1962,22 +2152,14 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
{
esw_acl_egress_ofld_cleanup(vport);
esw_acl_ingress_ofld_cleanup(esw, vport);
- esw_offloads_vport_metadata_cleanup(esw, vport);
}
static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
{
struct mlx5_vport *vport;
- int err;
-
- if (esw_use_vport_metadata(esw))
- esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
- err = esw_vport_create_offloads_acl_tables(esw, vport);
- if (err)
- esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
- return err;
+ return esw_vport_create_offloads_acl_tables(esw, vport);
}
static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
@@ -1986,7 +2168,6 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
esw_vport_destroy_offloads_acl_tables(esw, vport);
- esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
}
static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
@@ -2110,6 +2291,24 @@ int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type
return NOTIFY_OK;
}
+static int mlx5_esw_host_number_init(struct mlx5_eswitch *esw)
+{
+ const u32 *query_host_out;
+
+ if (!mlx5_core_is_ecpf_esw_manager(esw->dev))
+ return 0;
+
+ query_host_out = mlx5_esw_query_functions(esw->dev);
+ if (IS_ERR(query_host_out))
+ return PTR_ERR(query_host_out);
+
+ /* Mark non local controller with non zero controller number. */
+ esw->offloads.host_number = MLX5_GET(query_esw_functions_out, query_host_out,
+ host_params_context.host_number);
+ kvfree(query_host_out);
+ return 0;
+}
+
int esw_offloads_enable(struct mlx5_eswitch *esw)
{
struct mlx5_vport *vport;
@@ -2124,6 +2323,17 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
mutex_init(&esw->offloads.termtbl_mutex);
mlx5_rdma_enable_roce(esw->dev);
+ err = mlx5_esw_host_number_init(esw);
+ if (err)
+ goto err_metadata;
+
+ if (esw_check_vport_match_metadata_supported(esw))
+ esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+
+ err = esw_offloads_metadata_init(esw);
+ if (err)
+ goto err_metadata;
+
err = esw_set_passing_vport_metadata(esw, true);
if (err)
goto err_vport_metadata;
@@ -2156,6 +2366,9 @@ err_uplink:
err_steering_init:
esw_set_passing_vport_metadata(esw, false);
err_vport_metadata:
+ esw_offloads_metadata_uninit(esw);
+err_metadata:
+ esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
mlx5_rdma_disable_roce(esw->dev);
mutex_destroy(&esw->offloads.termtbl_mutex);
return err;
@@ -2189,6 +2402,8 @@ void esw_offloads_disable(struct mlx5_eswitch *esw)
esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
esw_set_passing_vport_metadata(esw, false);
esw_offloads_steering_cleanup(esw);
+ esw_offloads_metadata_uninit(esw);
+ esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
mlx5_rdma_disable_roce(esw->dev);
mutex_destroy(&esw->offloads.termtbl_mutex);
esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index 17a0d2bc102b..ec679560a95d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -3,6 +3,7 @@
#include <linux/mlx5/fs.h>
#include "eswitch.h"
+#include "en_tc.h"
#include "fs_core.h"
struct mlx5_termtbl_handle {
@@ -228,10 +229,11 @@ static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw,
bool
mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
- struct mlx5_esw_flow_attr *attr,
+ struct mlx5_flow_attr *attr,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_spec *spec)
{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
int i;
if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) ||
@@ -244,8 +246,8 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
return true;
/* hairpin */
- for (i = attr->split_count; i < attr->out_count; i++)
- if (attr->dests[i].rep->vport == MLX5_VPORT_UPLINK)
+ for (i = esw_attr->split_count; i < esw_attr->out_count; i++)
+ if (esw_attr->dests[i].rep->vport == MLX5_VPORT_UPLINK)
return true;
return false;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index 831d2c39e153..80da50e12915 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -54,7 +54,7 @@ static int mlx5_fpga_conn_map_buf(struct mlx5_fpga_conn *conn,
if (unlikely(!buf->sg[0].data))
goto out;
- dma_device = &conn->fdev->mdev->pdev->dev;
+ dma_device = mlx5_core_dma_dev(conn->fdev->mdev);
buf->sg[0].dma_addr = dma_map_single(dma_device, buf->sg[0].data,
buf->sg[0].size, buf->dma_dir);
err = dma_mapping_error(dma_device, buf->sg[0].dma_addr);
@@ -86,7 +86,7 @@ static void mlx5_fpga_conn_unmap_buf(struct mlx5_fpga_conn *conn,
{
struct device *dma_device;
- dma_device = &conn->fdev->mdev->pdev->dev;
+ dma_device = mlx5_core_dma_dev(conn->fdev->mdev);
if (buf->sg[1].data)
dma_unmap_single(dma_device, buf->sg[1].dma_addr,
buf->sg[1].size, buf->dma_dir);
@@ -388,9 +388,9 @@ static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn,
mlx5_fpga_conn_arm_cq(conn);
}
-static void mlx5_fpga_conn_cq_tasklet(unsigned long data)
+static void mlx5_fpga_conn_cq_tasklet(struct tasklet_struct *t)
{
- struct mlx5_fpga_conn *conn = (void *)data;
+ struct mlx5_fpga_conn *conn = from_tasklet(conn, t, cq.tasklet);
if (unlikely(!conn->qp.active))
return;
@@ -478,8 +478,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete;
conn->cq.mcq.irqn = irqn;
conn->cq.mcq.uar = fdev->conn_res.uar;
- tasklet_init(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet,
- (unsigned long)conn);
+ tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet);
mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index fee169732de7..babe3405132a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -776,6 +776,9 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
table_type = FS_FT_NIC_RX;
break;
case MLX5_FLOW_NAMESPACE_EGRESS:
+#ifdef CONFIG_MLX5_IPSEC
+ case MLX5_FLOW_NAMESPACE_EGRESS_KERNEL:
+#endif
max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions);
table_type = FS_FT_NIC_TX;
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 9ccec5f8b92a..16091838bfcf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -126,6 +126,10 @@
#define LAG_NUM_PRIOS 1
#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1)
+#define KERNEL_TX_IPSEC_NUM_PRIOS 1
+#define KERNEL_TX_IPSEC_NUM_LEVELS 1
+#define KERNEL_TX_MIN_LEVEL (KERNEL_TX_IPSEC_NUM_LEVELS)
+
struct node_caps {
size_t arr_sz;
long *caps;
@@ -180,13 +184,24 @@ static struct init_tree_node {
static struct init_tree_node egress_root_fs = {
.type = FS_TYPE_NAMESPACE,
+#ifdef CONFIG_MLX5_IPSEC
+ .ar_size = 2,
+#else
.ar_size = 1,
+#endif
.children = (struct init_tree_node[]) {
ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
FS_CHAINING_CAPS_EGRESS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
BY_PASS_PRIO_NUM_LEVELS))),
+#ifdef CONFIG_MLX5_IPSEC
+ ADD_PRIO(0, KERNEL_TX_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS_EGRESS,
+ ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ ADD_MULTIPLE_PRIO(KERNEL_TX_IPSEC_NUM_PRIOS,
+ KERNEL_TX_IPSEC_NUM_LEVELS))),
+#endif
}
};
@@ -654,7 +669,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
fte->action = *flow_act;
fte->flow_context = spec->flow_context;
- tree_init_node(&fte->node, NULL, del_sw_fte);
+ tree_init_node(&fte->node, del_hw_fte, del_sw_fte);
return fte;
}
@@ -1595,11 +1610,12 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest,
return true;
if (ignore_level) {
- if (ft->type != FS_FT_FDB)
+ if (ft->type != FS_FT_FDB &&
+ ft->type != FS_FT_NIC_RX)
return false;
if (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
- dest->ft->type != FS_FT_FDB)
+ ft->type != dest->ft->type)
return false;
}
@@ -1792,7 +1808,6 @@ skip_search:
up_write_ref_node(&g->node, false);
rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
up_write_ref_node(&fte->node, false);
- tree_put_node(&fte->node, false);
return rule;
}
rule = ERR_PTR(-ENOENT);
@@ -1891,7 +1906,6 @@ search_again_locked:
up_write_ref_node(&g->node, false);
rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
up_write_ref_node(&fte->node, false);
- tree_put_node(&fte->node, false);
tree_put_node(&g->node, false);
return rule;
@@ -2001,7 +2015,9 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
up_write_ref_node(&fte->node, false);
} else {
del_hw_fte(&fte->node);
- up_write(&fte->node.lock);
+ /* Avoid double call to del_hw_fte */
+ fte->node.del_hw_func = NULL;
+ up_write_ref_node(&fte->node, false);
tree_put_node(&fte->node, false);
}
kfree(handle);
@@ -2164,8 +2180,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
break;
}
- if (type == MLX5_FLOW_NAMESPACE_EGRESS) {
+ if (type == MLX5_FLOW_NAMESPACE_EGRESS ||
+ type == MLX5_FLOW_NAMESPACE_EGRESS_KERNEL) {
root_ns = steering->egress_root_ns;
+ prio = type - MLX5_FLOW_NAMESPACE_EGRESS;
} else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
root_ns = steering->rdma_rx_root_ns;
prio = RDMA_RX_BYPASS_PRIO;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
new file mode 100644
index 000000000000..f9042e147c7f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include "fw_reset.h"
+#include "diag/fw_tracer.h"
+
+enum {
+ MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
+ MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
+ MLX5_FW_RESET_FLAGS_PENDING_COMP
+};
+
+struct mlx5_fw_reset {
+ struct mlx5_core_dev *dev;
+ struct mlx5_nb nb;
+ struct workqueue_struct *wq;
+ struct work_struct fw_live_patch_work;
+ struct work_struct reset_request_work;
+ struct work_struct reset_reload_work;
+ struct work_struct reset_now_work;
+ struct work_struct reset_abort_work;
+ unsigned long reset_flags;
+ struct timer_list timer;
+ struct completion done;
+ int ret;
+};
+
+void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ if (enable)
+ clear_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags);
+ else
+ set_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags);
+}
+
+bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ return !test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags);
+}
+
+static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level,
+ u8 reset_type_sel, u8 sync_resp, bool sync_start)
+{
+ u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+
+ MLX5_SET(mfrl_reg, in, reset_level, reset_level);
+ MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel);
+ MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_resp, sync_resp);
+ MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, sync_start);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 1);
+}
+
+static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type)
+{
+ u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+ int err;
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 0);
+ if (err)
+ return err;
+
+ if (reset_level)
+ *reset_level = MLX5_GET(mfrl_reg, out, reset_level);
+ if (reset_type)
+ *reset_type = MLX5_GET(mfrl_reg, out, reset_type);
+
+ return 0;
+}
+
+int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type)
+{
+ return mlx5_reg_mfrl_query(dev, reset_level, reset_type);
+}
+
+int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ int err;
+
+ set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
+ err = mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, reset_type_sel, 0, true);
+ if (err)
+ clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
+ return err;
+}
+
+int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev)
+{
+ return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false);
+}
+
+static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ /* if this is the driver that initiated the fw reset, devlink completed the reload */
+ if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) {
+ complete(&fw_reset->done);
+ } else {
+ mlx5_load_one(dev, false);
+ devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0,
+ BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
+ BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE));
+ }
+}
+
+static void mlx5_sync_reset_reload_work(struct work_struct *work)
+{
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+ reset_reload_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+ int err;
+
+ mlx5_enter_error_state(dev, true);
+ mlx5_unload_one(dev, false);
+ err = mlx5_health_wait_pci_up(dev);
+ if (err)
+ mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n");
+ fw_reset->ret = err;
+ mlx5_fw_reset_complete_reload(dev);
+}
+
+static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ del_timer(&fw_reset->timer);
+}
+
+static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ mlx5_stop_sync_reset_poll(dev);
+ clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
+ if (poll_health)
+ mlx5_start_health_poll(dev);
+}
+
+#define MLX5_RESET_POLL_INTERVAL (HZ / 10)
+static void poll_sync_reset(struct timer_list *t)
+{
+ struct mlx5_fw_reset *fw_reset = from_timer(fw_reset, t, timer);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+ u32 fatal_error;
+
+ if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
+ return;
+
+ fatal_error = mlx5_health_check_fatal_sensors(dev);
+
+ if (fatal_error) {
+ mlx5_core_warn(dev, "Got Device Reset\n");
+ mlx5_sync_reset_clear_reset_requested(dev, false);
+ queue_work(fw_reset->wq, &fw_reset->reset_reload_work);
+ return;
+ }
+
+ mod_timer(&fw_reset->timer, round_jiffies(jiffies + MLX5_RESET_POLL_INTERVAL));
+}
+
+static void mlx5_start_sync_reset_poll(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ timer_setup(&fw_reset->timer, poll_sync_reset, 0);
+ fw_reset->timer.expires = round_jiffies(jiffies + MLX5_RESET_POLL_INTERVAL);
+ add_timer(&fw_reset->timer);
+}
+
+static int mlx5_fw_reset_set_reset_sync_ack(struct mlx5_core_dev *dev)
+{
+ return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 1, false);
+}
+
+static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev)
+{
+ return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false);
+}
+
+static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ mlx5_stop_health_poll(dev, true);
+ set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
+ mlx5_start_sync_reset_poll(dev);
+}
+
+static void mlx5_fw_live_patch_event(struct work_struct *work)
+{
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+ fw_live_patch_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+ struct mlx5_fw_tracer *tracer;
+
+ mlx5_core_info(dev, "Live patch updated firmware version: %d.%d.%d\n", fw_rev_maj(dev),
+ fw_rev_min(dev), fw_rev_sub(dev));
+
+ tracer = dev->tracer;
+ if (IS_ERR_OR_NULL(tracer))
+ return;
+
+ if (mlx5_fw_tracer_reload(tracer))
+ mlx5_core_err(dev, "Failed to reload FW tracer\n");
+}
+
+static void mlx5_sync_reset_request_event(struct work_struct *work)
+{
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+ reset_request_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+ int err;
+
+ if (test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags)) {
+ err = mlx5_fw_reset_set_reset_sync_nack(dev);
+ mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s",
+ err ? "Failed" : "Sent");
+ return;
+ }
+ mlx5_sync_reset_set_reset_requested(dev);
+ err = mlx5_fw_reset_set_reset_sync_ack(dev);
+ if (err)
+ mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
+ else
+ mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n");
+}
+
+#define MLX5_PCI_LINK_UP_TIMEOUT 2000
+
+static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
+{
+ struct pci_bus *bridge_bus = dev->pdev->bus;
+ struct pci_dev *bridge = bridge_bus->self;
+ u16 reg16, dev_id, sdev_id;
+ unsigned long timeout;
+ struct pci_dev *sdev;
+ int cap, err;
+ u32 reg32;
+
+ /* Check that all functions under the pci bridge are PFs of
+ * this device otherwise fail this function.
+ */
+ err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id);
+ if (err)
+ return err;
+ list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
+ err = pci_read_config_word(sdev, PCI_DEVICE_ID, &sdev_id);
+ if (err)
+ return err;
+ if (sdev_id != dev_id)
+ return -EPERM;
+ }
+
+ cap = pci_find_capability(bridge, PCI_CAP_ID_EXP);
+ if (!cap)
+ return -EOPNOTSUPP;
+
+ list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
+ pci_save_state(sdev);
+ pci_cfg_access_lock(sdev);
+ }
+ /* PCI link toggle */
+ err = pci_read_config_word(bridge, cap + PCI_EXP_LNKCTL, &reg16);
+ if (err)
+ return err;
+ reg16 |= PCI_EXP_LNKCTL_LD;
+ err = pci_write_config_word(bridge, cap + PCI_EXP_LNKCTL, reg16);
+ if (err)
+ return err;
+ msleep(500);
+ reg16 &= ~PCI_EXP_LNKCTL_LD;
+ err = pci_write_config_word(bridge, cap + PCI_EXP_LNKCTL, reg16);
+ if (err)
+ return err;
+
+ /* Check link */
+ err = pci_read_config_dword(bridge, cap + PCI_EXP_LNKCAP, &reg32);
+ if (err)
+ return err;
+ if (!(reg32 & PCI_EXP_LNKCAP_DLLLARC)) {
+ mlx5_core_warn(dev, "No PCI link reporting capability (0x%08x)\n", reg32);
+ msleep(1000);
+ goto restore;
+ }
+
+ timeout = jiffies + msecs_to_jiffies(MLX5_PCI_LINK_UP_TIMEOUT);
+ do {
+ err = pci_read_config_word(bridge, cap + PCI_EXP_LNKSTA, &reg16);
+ if (err)
+ return err;
+ if (reg16 & PCI_EXP_LNKSTA_DLLLA)
+ break;
+ msleep(20);
+ } while (!time_after(jiffies, timeout));
+
+ if (reg16 & PCI_EXP_LNKSTA_DLLLA) {
+ mlx5_core_info(dev, "PCI Link up\n");
+ } else {
+ mlx5_core_err(dev, "PCI link not ready (0x%04x) after %d ms\n",
+ reg16, MLX5_PCI_LINK_UP_TIMEOUT);
+ err = -ETIMEDOUT;
+ }
+
+restore:
+ list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
+ pci_cfg_access_unlock(sdev);
+ pci_restore_state(sdev);
+ }
+
+ return err;
+}
+
+static void mlx5_sync_reset_now_event(struct work_struct *work)
+{
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+ reset_now_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+ int err;
+
+ mlx5_sync_reset_clear_reset_requested(dev, false);
+
+ mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");
+
+ err = mlx5_cmd_fast_teardown_hca(dev);
+ if (err) {
+ mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err);
+ goto done;
+ }
+
+ err = mlx5_pci_link_toggle(dev);
+ if (err) {
+ mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err);
+ goto done;
+ }
+
+ mlx5_enter_error_state(dev, true);
+ mlx5_unload_one(dev, false);
+done:
+ fw_reset->ret = err;
+ mlx5_fw_reset_complete_reload(dev);
+}
+
+static void mlx5_sync_reset_abort_event(struct work_struct *work)
+{
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+ reset_abort_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+
+ mlx5_sync_reset_clear_reset_requested(dev, true);
+ mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
+}
+
+static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct mlx5_eqe *eqe)
+{
+ struct mlx5_eqe_sync_fw_update *sync_fw_update_eqe;
+ u8 sync_event_rst_type;
+
+ sync_fw_update_eqe = &eqe->data.sync_fw_update;
+ sync_event_rst_type = sync_fw_update_eqe->sync_rst_state & SYNC_RST_STATE_MASK;
+ switch (sync_event_rst_type) {
+ case MLX5_SYNC_RST_STATE_RESET_REQUEST:
+ queue_work(fw_reset->wq, &fw_reset->reset_request_work);
+ break;
+ case MLX5_SYNC_RST_STATE_RESET_NOW:
+ queue_work(fw_reset->wq, &fw_reset->reset_now_work);
+ break;
+ case MLX5_SYNC_RST_STATE_RESET_ABORT:
+ queue_work(fw_reset->wq, &fw_reset->reset_abort_work);
+ break;
+ }
+}
+
+static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long action, void *data)
+{
+ struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb);
+ struct mlx5_eqe *eqe = data;
+
+ switch (eqe->sub_type) {
+ case MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT:
+ queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work);
+ break;
+ case MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT:
+ mlx5_sync_reset_events_handle(fw_reset, eqe);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
+}
+
+#define MLX5_FW_RESET_TIMEOUT_MSEC 5000
+int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev)
+{
+ unsigned long timeout = msecs_to_jiffies(MLX5_FW_RESET_TIMEOUT_MSEC);
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ int err;
+
+ if (!wait_for_completion_timeout(&fw_reset->done, timeout)) {
+ mlx5_core_warn(dev, "FW sync reset timeout after %d seconds\n",
+ MLX5_FW_RESET_TIMEOUT_MSEC / 1000);
+ err = -ETIMEDOUT;
+ goto out;
+ }
+ err = fw_reset->ret;
+out:
+ clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
+ return err;
+}
+
+void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ MLX5_NB_INIT(&fw_reset->nb, fw_reset_event_notifier, GENERAL_EVENT);
+ mlx5_eq_notifier_register(dev, &fw_reset->nb);
+}
+
+void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev)
+{
+ mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb);
+}
+
+int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL);
+
+ if (!fw_reset)
+ return -ENOMEM;
+ fw_reset->wq = create_singlethread_workqueue("mlx5_fw_reset_events");
+ if (!fw_reset->wq) {
+ kfree(fw_reset);
+ return -ENOMEM;
+ }
+
+ fw_reset->dev = dev;
+ dev->priv.fw_reset = fw_reset;
+
+ INIT_WORK(&fw_reset->fw_live_patch_work, mlx5_fw_live_patch_event);
+ INIT_WORK(&fw_reset->reset_request_work, mlx5_sync_reset_request_event);
+ INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work);
+ INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event);
+ INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event);
+
+ init_completion(&fw_reset->done);
+ return 0;
+}
+
+void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ destroy_workqueue(fw_reset->wq);
+ kfree(dev->priv.fw_reset);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
new file mode 100644
index 000000000000..7761ee5fc7d0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_FW_RESET_H
+#define __MLX5_FW_RESET_H
+
+#include "mlx5_core.h"
+
+void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable);
+bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev);
+int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type);
+int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel);
+int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev);
+
+int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev);
+void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev);
+void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev);
+int mlx5_fw_reset_init(struct mlx5_core_dev *dev);
+void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index b31f769d2df9..54523bed16cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -110,7 +110,7 @@ static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
return rfr && synd;
}
-static u32 check_fatal_sensors(struct mlx5_core_dev *dev)
+u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev)
{
if (sensor_pci_not_working(dev))
return MLX5_SENSOR_PCI_COMM_ERR;
@@ -173,7 +173,7 @@ static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
* Check again to avoid a redundant 2nd reset. If the fatal erros was
* PCI related a reset won't help.
*/
- fatal_error = check_fatal_sensors(dev);
+ fatal_error = mlx5_health_check_fatal_sensors(dev);
if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
fatal_error == MLX5_SENSOR_NIC_DISABLED ||
fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
@@ -195,7 +195,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
bool err_detected = false;
/* Mark the device as fatal in order to abort FW commands */
- if ((check_fatal_sensors(dev) || force) &&
+ if ((mlx5_health_check_fatal_sensors(dev) || force) &&
dev->state == MLX5_DEVICE_STATE_UP) {
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
err_detected = true;
@@ -208,7 +208,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
goto unlock;
}
- if (check_fatal_sensors(dev) || force) { /* protected state setting */
+ if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
mlx5_cmd_flush(dev);
}
@@ -231,7 +231,7 @@ void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
mlx5_core_err(dev, "start\n");
- if (check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) {
+ if (mlx5_health_check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) {
/* Get cr-dump and reset FW semaphore */
lock = lock_sem_sw_reset(dev, true);
@@ -308,26 +308,31 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
/* How much time to wait until health resetting the driver (in msecs) */
#define MLX5_RECOVERY_WAIT_MSECS 60000
-static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
+int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
{
unsigned long end;
- mlx5_core_warn(dev, "handling bad device here\n");
- mlx5_handle_bad_state(dev);
end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS);
while (sensor_pci_not_working(dev)) {
- if (time_after(jiffies, end)) {
- mlx5_core_err(dev,
- "health recovery flow aborted, PCI reads still not working\n");
- return -EIO;
- }
+ if (time_after(jiffies, end))
+ return -ETIMEDOUT;
msleep(100);
}
+ return 0;
+}
+static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
+{
+ mlx5_core_warn(dev, "handling bad device here\n");
+ mlx5_handle_bad_state(dev);
+ if (mlx5_health_wait_pci_up(dev)) {
+ mlx5_core_err(dev, "health recovery flow aborted, PCI reads still not working\n");
+ return -EIO;
+ }
mlx5_core_err(dev, "starting health recovery flow\n");
mlx5_recover_device(dev);
if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state) ||
- check_fatal_sensors(dev)) {
+ mlx5_health_check_fatal_sensors(dev)) {
mlx5_core_err(dev, "health recovery failed\n");
return -EIO;
}
@@ -696,7 +701,7 @@ static void poll_health(struct timer_list *t)
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
goto out;
- fatal_error = check_fatal_sensors(dev);
+ fatal_error = mlx5_health_check_fatal_sensors(dev);
if (fatal_error && !health->fatal_error) {
mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
index 1eef66ee849e..cac8f085b16d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -130,14 +130,6 @@ static int mlx5i_flash_device(struct net_device *netdev,
return mlx5e_ethtool_flash_device(priv, flash);
}
-enum mlx5_ptys_width {
- MLX5_PTYS_WIDTH_1X = 1 << 0,
- MLX5_PTYS_WIDTH_2X = 1 << 1,
- MLX5_PTYS_WIDTH_4X = 1 << 2,
- MLX5_PTYS_WIDTH_8X = 1 << 3,
- MLX5_PTYS_WIDTH_12X = 1 << 4,
-};
-
static inline int mlx5_ptys_width_enum_to_int(enum mlx5_ptys_width width)
{
switch (width) {
@@ -174,24 +166,6 @@ static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate)
}
}
-static int mlx5i_get_port_settings(struct net_device *netdev,
- u16 *ib_link_width_oper, u16 *ib_proto_oper)
-{
- struct mlx5e_priv *priv = mlx5i_epriv(netdev);
- struct mlx5_core_dev *mdev = priv->mdev;
- u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
- int ret;
-
- ret = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_IB, 1);
- if (ret)
- return ret;
-
- *ib_link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
- *ib_proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
-
- return 0;
-}
-
static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper)
{
int rate, width;
@@ -209,11 +183,14 @@ static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper)
static int mlx5i_get_link_ksettings(struct net_device *netdev,
struct ethtool_link_ksettings *link_ksettings)
{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
u16 ib_link_width_oper;
u16 ib_proto_oper;
int speed, ret;
- ret = mlx5i_get_port_settings(netdev, &ib_link_width_oper, &ib_proto_oper);
+ ret = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper, &ib_proto_oper,
+ 1);
if (ret)
return ret;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 874c70e8cc54..33081b24f10a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -102,7 +102,7 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
if (ldev->pf[i].netdev == ndev)
return i;
- return -1;
+ return -ENOENT;
}
static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
@@ -271,7 +271,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
bool do_bond, roce_lag;
int err;
- if (!dev0 || !dev1)
+ if (!mlx5_lag_is_ready(ldev))
return;
spin_lock(&lag_lock);
@@ -355,7 +355,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
{
struct net_device *upper = info->upper_dev, *ndev_tmp;
struct netdev_lag_upper_info *lag_upper_info = NULL;
- bool is_bonded;
+ bool is_bonded, is_in_lag, mode_supported;
int bond_status = 0;
int num_slaves = 0;
int idx;
@@ -374,7 +374,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
rcu_read_lock();
for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
- if (idx > -1)
+ if (idx >= 0)
bond_status |= (1 << idx);
num_slaves++;
@@ -391,13 +391,24 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
/* Determine bonding status:
* A device is considered bonded if both its physical ports are slaves
* of the same lag master, and only them.
- * Lag mode must be activebackup or hash.
*/
- is_bonded = (num_slaves == MLX5_MAX_PORTS) &&
- (bond_status == 0x3) &&
- ((tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ||
- (tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH));
+ is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
+ if (!mlx5_lag_is_ready(ldev) && is_in_lag) {
+ NL_SET_ERR_MSG_MOD(info->info.extack,
+ "Can't activate LAG offload, PF is configured with more than 64 VFs");
+ return 0;
+ }
+
+ /* Lag mode must be activebackup or hash. */
+ mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
+ tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
+
+ if (is_in_lag && !mode_supported)
+ NL_SET_ERR_MSG_MOD(info->info.extack,
+ "Can't activate LAG offload, TX type isn't supported");
+
+ is_bonded = is_in_lag && mode_supported;
if (tracker->is_bonded != is_bonded) {
tracker->is_bonded = is_bonded;
return 1;
@@ -418,7 +429,7 @@ static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
return 0;
idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
- if (idx == -1)
+ if (idx < 0)
return 0;
/* This information is used to determine virtual to physical
@@ -445,6 +456,10 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
ldev = container_of(this, struct mlx5_lag, nb);
+
+ if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE)
+ return NOTIFY_DONE;
+
tracker = ldev->tracker;
switch (event) {
@@ -493,14 +508,14 @@ static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
kfree(ldev);
}
-static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
- struct mlx5_core_dev *dev,
- struct net_device *netdev)
+static int mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
+ struct mlx5_core_dev *dev,
+ struct net_device *netdev)
{
unsigned int fn = PCI_FUNC(dev->pdev->devfn);
if (fn >= MLX5_MAX_PORTS)
- return;
+ return -EPERM;
spin_lock(&lag_lock);
ldev->pf[fn].dev = dev;
@@ -511,6 +526,8 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
dev->priv.lag = ldev;
spin_unlock(&lag_lock);
+
+ return fn;
}
static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
@@ -537,11 +554,9 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
{
struct mlx5_lag *ldev = NULL;
struct mlx5_core_dev *tmp_dev;
- int err;
+ int i, err;
- if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
- !MLX5_CAP_GEN(dev, lag_master) ||
- (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS))
+ if (!MLX5_CAP_GEN(dev, vport_group_manager))
return;
tmp_dev = mlx5_get_next_phys_dev(dev);
@@ -556,7 +571,18 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
}
}
- mlx5_lag_dev_add_pf(ldev, dev, netdev);
+ if (mlx5_lag_dev_add_pf(ldev, dev, netdev) < 0)
+ return;
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++) {
+ tmp_dev = ldev->pf[i].dev;
+ if (!tmp_dev || !MLX5_CAP_GEN(tmp_dev, lag_master) ||
+ MLX5_CAP_GEN(tmp_dev, num_lag_ports) != MLX5_MAX_PORTS)
+ break;
+ }
+
+ if (i >= MLX5_MAX_PORTS)
+ ldev->flags |= MLX5_LAG_FLAG_READY;
if (!ldev->nb.notifier_call) {
ldev->nb.notifier_call = mlx5_lag_netdev_event;
@@ -587,6 +613,8 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev)
mlx5_lag_dev_remove_pf(ldev, dev);
+ ldev->flags &= ~MLX5_LAG_FLAG_READY;
+
for (i = 0; i < MLX5_MAX_PORTS; i++)
if (ldev->pf[i].dev)
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
index f1068aac6406..8d8cf2d0bc6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
@@ -16,6 +16,7 @@ enum {
MLX5_LAG_FLAG_ROCE = 1 << 0,
MLX5_LAG_FLAG_SRIOV = 1 << 1,
MLX5_LAG_FLAG_MULTIPATH = 1 << 2,
+ MLX5_LAG_FLAG_READY = 1 << 3,
};
#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\
@@ -59,6 +60,12 @@ __mlx5_lag_is_active(struct mlx5_lag *ldev)
return !!(ldev->flags & MLX5_LAG_MODE_FLAGS);
}
+static inline bool
+mlx5_lag_is_ready(struct mlx5_lag *ldev)
+{
+ return ldev->flags & MLX5_LAG_FLAG_READY;
+}
+
void mlx5_modify_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker);
int mlx5_activate_lag(struct mlx5_lag *ldev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index 9e68f5926ab6..88e58ac902de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -11,7 +11,7 @@
static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
{
- if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
+ if (!mlx5_lag_is_ready(ldev))
return false;
return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
@@ -131,7 +131,12 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
struct net_device *nh_dev = nh->fib_nh_dev;
int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
- mlx5_lag_set_port_affinity(ldev, ++i);
+ if (i < 0)
+ i = MLX5_LAG_NORMAL_AFFINITY;
+ else
+ ++i;
+
+ mlx5_lag_set_port_affinity(ldev, i);
}
return;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 2d55b7c22c03..c70c1f0ca0c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -150,28 +150,30 @@ static void mlx5_pps_out(struct work_struct *work)
static void mlx5_timestamp_overflow(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
- struct mlx5_clock *clock = container_of(dwork, struct mlx5_clock,
- overflow_work);
+ struct mlx5_core_dev *mdev;
+ struct mlx5_clock *clock;
unsigned long flags;
+ clock = container_of(dwork, struct mlx5_clock, overflow_work);
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
write_seqlock_irqsave(&clock->lock, flags);
timecounter_read(&clock->tc);
- mlx5_update_clock_info_page(clock->mdev);
+ mlx5_update_clock_info_page(mdev);
write_sequnlock_irqrestore(&clock->lock, flags);
schedule_delayed_work(&clock->overflow_work, clock->overflow_period);
}
-static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
- const struct timespec64 *ts)
+static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts)
{
- struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
- ptp_info);
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
u64 ns = timespec64_to_ns(ts);
+ struct mlx5_core_dev *mdev;
unsigned long flags;
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
write_seqlock_irqsave(&clock->lock, flags);
timecounter_init(&clock->tc, &clock->cycles, ns);
- mlx5_update_clock_info_page(clock->mdev);
+ mlx5_update_clock_info_page(mdev);
write_sequnlock_irqrestore(&clock->lock, flags);
return 0;
@@ -180,13 +182,12 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
struct ptp_system_timestamp *sts)
{
- struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
- ptp_info);
- struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
- clock);
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_core_dev *mdev;
unsigned long flags;
u64 cycles, ns;
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
write_seqlock_irqsave(&clock->lock, flags);
cycles = mlx5_read_internal_timer(mdev, sts);
ns = timecounter_cyc2time(&clock->tc, cycles);
@@ -199,13 +200,14 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
{
- struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
- ptp_info);
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_core_dev *mdev;
unsigned long flags;
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
write_seqlock_irqsave(&clock->lock, flags);
timecounter_adjtime(&clock->tc, delta);
- mlx5_update_clock_info_page(clock->mdev);
+ mlx5_update_clock_info_page(mdev);
write_sequnlock_irqrestore(&clock->lock, flags);
return 0;
@@ -213,12 +215,13 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
{
- u64 adj;
- u32 diff;
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_core_dev *mdev;
unsigned long flags;
int neg_adj = 0;
- struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
- ptp_info);
+ u32 diff;
+ u64 adj;
+
if (delta < 0) {
neg_adj = 1;
@@ -229,11 +232,12 @@ static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
adj *= delta;
diff = div_u64(adj, 1000000000ULL);
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
write_seqlock_irqsave(&clock->lock, flags);
timecounter_read(&clock->tc);
clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
clock->nominal_c_mult + diff;
- mlx5_update_clock_info_page(clock->mdev);
+ mlx5_update_clock_info_page(mdev);
write_sequnlock_irqrestore(&clock->lock, flags);
return 0;
@@ -431,13 +435,11 @@ static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
default:
return -EOPNOTSUPP;
}
-
- return -EOPNOTSUPP;
}
static const struct ptp_clock_info mlx5_ptp_clock_info = {
.owner = THIS_MODULE,
- .name = "mlx5_p2p",
+ .name = "mlx5_ptp",
.max_adj = 100000000,
.n_alarm = 0,
.n_ext_ts = 0,
@@ -465,7 +467,8 @@ static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin,
static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin)
{
- struct mlx5_core_dev *mdev = clock->mdev;
+ struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock);
+
u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {};
u8 mode;
int err;
@@ -538,20 +541,23 @@ static int mlx5_pps_event(struct notifier_block *nb,
unsigned long type, void *data)
{
struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb);
- struct mlx5_core_dev *mdev = clock->mdev;
struct ptp_clock_event ptp_event;
u64 cycles_now, cycles_delta;
u64 nsec_now, nsec_delta, ns;
struct mlx5_eqe *eqe = data;
int pin = eqe->data.pps.pin;
+ struct mlx5_core_dev *mdev;
struct timespec64 ts;
unsigned long flags;
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
+
switch (clock->ptp_info.pin_config[pin].func) {
case PTP_PF_EXTTS:
ptp_event.index = pin;
- ptp_event.timestamp = timecounter_cyc2time(&clock->tc,
- be64_to_cpu(eqe->data.pps.time_stamp));
+ ptp_event.timestamp =
+ mlx5_timecounter_cyc2time(clock,
+ be64_to_cpu(eqe->data.pps.time_stamp));
if (clock->pps_info.enabled) {
ptp_event.type = PTP_CLOCK_PPSUSR;
ptp_event.pps_times.ts_real =
@@ -574,8 +580,8 @@ static int mlx5_pps_event(struct notifier_block *nb,
cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
clock->cycles.mult);
clock->pps_info.start[pin] = cycles_now + cycles_delta;
- schedule_work(&clock->pps_info.out_work);
write_sequnlock_irqrestore(&clock->lock, flags);
+ schedule_work(&clock->pps_info.out_work);
break;
default:
mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n",
@@ -605,7 +611,6 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
clock->cycles.shift);
clock->nominal_c_mult = clock->cycles.mult;
clock->cycles.mask = CLOCKSOURCE_MASK(41);
- clock->mdev = mdev;
timecounter_init(&clock->tc, &clock->cycles,
ktime_to_ns(ktime_get_real()));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
index 4aaca7400fb2..81f2cc4ca1da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -37,6 +37,7 @@ struct mlx5_eq {
struct mlx5_eq_async {
struct mlx5_eq core;
struct notifier_block irq_nb;
+ spinlock_t lock; /* To avoid irq EQ handle races with resiliency flows */
};
struct mlx5_eq_comp {
@@ -77,10 +78,11 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn);
struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev);
-void mlx5_cq_tasklet_cb(unsigned long data);
+void mlx5_cq_tasklet_cb(struct tasklet_struct *t);
struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix);
u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq);
+void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev);
void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev);
void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
new file mode 100644
index 000000000000..947f346bdc2d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
@@ -0,0 +1,911 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2020 Mellanox Technologies.
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/fs.h>
+
+#include "lib/fs_chains.h"
+#include "en/mapping.h"
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "eswitch.h"
+#include "en.h"
+#include "en_tc.h"
+
+#define chains_lock(chains) ((chains)->lock)
+#define chains_ht(chains) ((chains)->chains_ht)
+#define chains_mapping(chains) ((chains)->chains_mapping)
+#define prios_ht(chains) ((chains)->prios_ht)
+#define ft_pool_left(chains) ((chains)->ft_left)
+#define tc_default_ft(chains) ((chains)->tc_default_ft)
+#define tc_end_ft(chains) ((chains)->tc_end_ft)
+#define ns_to_chains_fs_prio(ns) ((ns) == MLX5_FLOW_NAMESPACE_FDB ? \
+ FDB_TC_OFFLOAD : MLX5E_TC_PRIO)
+
+/* Firmware currently has 4 pool of 4 sizes that it supports (FT_POOLS),
+ * and a virtual memory region of 16M (MLX5_FT_SIZE), this region is duplicated
+ * for each flow table pool. We can allocate up to 16M of each pool,
+ * and we keep track of how much we used via get_next_avail_sz_from_pool.
+ * Firmware doesn't report any of this for now.
+ * ESW_POOL is expected to be sorted from large to small and match firmware
+ * pools.
+ */
+#define FT_SIZE (16 * 1024 * 1024)
+static const unsigned int FT_POOLS[] = { 4 * 1024 * 1024,
+ 1 * 1024 * 1024,
+ 64 * 1024,
+ 128 };
+#define FT_TBL_SZ (64 * 1024)
+
+struct mlx5_fs_chains {
+ struct mlx5_core_dev *dev;
+
+ struct rhashtable chains_ht;
+ struct rhashtable prios_ht;
+ /* Protects above chains_ht and prios_ht */
+ struct mutex lock;
+
+ struct mlx5_flow_table *tc_default_ft;
+ struct mlx5_flow_table *tc_end_ft;
+ struct mapping_ctx *chains_mapping;
+
+ enum mlx5_flow_namespace_type ns;
+ u32 group_num;
+ u32 flags;
+
+ int ft_left[ARRAY_SIZE(FT_POOLS)];
+};
+
+struct fs_chain {
+ struct rhash_head node;
+
+ u32 chain;
+
+ int ref;
+ int id;
+
+ struct mlx5_fs_chains *chains;
+ struct list_head prios_list;
+ struct mlx5_flow_handle *restore_rule;
+ struct mlx5_modify_hdr *miss_modify_hdr;
+};
+
+struct prio_key {
+ u32 chain;
+ u32 prio;
+ u32 level;
+};
+
+struct prio {
+ struct rhash_head node;
+ struct list_head list;
+
+ struct prio_key key;
+
+ int ref;
+
+ struct fs_chain *chain;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_table *next_ft;
+ struct mlx5_flow_group *miss_group;
+ struct mlx5_flow_handle *miss_rule;
+};
+
+static const struct rhashtable_params chain_params = {
+ .head_offset = offsetof(struct fs_chain, node),
+ .key_offset = offsetof(struct fs_chain, chain),
+ .key_len = sizeof_field(struct fs_chain, chain),
+ .automatic_shrinking = true,
+};
+
+static const struct rhashtable_params prio_params = {
+ .head_offset = offsetof(struct prio, node),
+ .key_offset = offsetof(struct prio, key),
+ .key_len = sizeof_field(struct prio, key),
+ .automatic_shrinking = true,
+};
+
+bool mlx5_chains_prios_supported(struct mlx5_fs_chains *chains)
+{
+ return chains->flags & MLX5_CHAINS_AND_PRIOS_SUPPORTED;
+}
+
+static bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains)
+{
+ return chains->flags & MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
+}
+
+bool mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains)
+{
+ return mlx5_chains_prios_supported(chains) &&
+ mlx5_chains_ignore_flow_level_supported(chains);
+}
+
+u32 mlx5_chains_get_chain_range(struct mlx5_fs_chains *chains)
+{
+ if (!mlx5_chains_prios_supported(chains))
+ return 1;
+
+ if (mlx5_chains_ignore_flow_level_supported(chains))
+ return UINT_MAX - 1;
+
+ /* We should get here only for eswitch case */
+ return FDB_TC_MAX_CHAIN;
+}
+
+u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains)
+{
+ return mlx5_chains_get_chain_range(chains) + 1;
+}
+
+u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains)
+{
+ if (!mlx5_chains_prios_supported(chains))
+ return 1;
+
+ if (mlx5_chains_ignore_flow_level_supported(chains))
+ return UINT_MAX;
+
+ /* We should get here only for eswitch case */
+ return FDB_TC_MAX_PRIO;
+}
+
+static unsigned int mlx5_chains_get_level_range(struct mlx5_fs_chains *chains)
+{
+ if (mlx5_chains_ignore_flow_level_supported(chains))
+ return UINT_MAX;
+
+ /* Same value for FDB and NIC RX tables */
+ return FDB_TC_LEVELS_PER_PRIO;
+}
+
+void
+mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
+ struct mlx5_flow_table *ft)
+{
+ tc_end_ft(chains) = ft;
+}
+
+#define POOL_NEXT_SIZE 0
+static int
+mlx5_chains_get_avail_sz_from_pool(struct mlx5_fs_chains *chains,
+ int desired_size)
+{
+ int i, found_i = -1;
+
+ for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) {
+ if (ft_pool_left(chains)[i] && FT_POOLS[i] > desired_size) {
+ found_i = i;
+ if (desired_size != POOL_NEXT_SIZE)
+ break;
+ }
+ }
+
+ if (found_i != -1) {
+ --ft_pool_left(chains)[found_i];
+ return FT_POOLS[found_i];
+ }
+
+ return 0;
+}
+
+static void
+mlx5_chains_put_sz_to_pool(struct mlx5_fs_chains *chains, int sz)
+{
+ int i;
+
+ for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) {
+ if (sz == FT_POOLS[i]) {
+ ++ft_pool_left(chains)[i];
+ return;
+ }
+ }
+
+ WARN_ONCE(1, "Couldn't find size %d in flow table size pool", sz);
+}
+
+static void
+mlx5_chains_init_sz_pool(struct mlx5_fs_chains *chains, u32 ft_max)
+{
+ int i;
+
+ for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--)
+ ft_pool_left(chains)[i] =
+ FT_POOLS[i] <= ft_max ? FT_SIZE / FT_POOLS[i] : 0;
+}
+
+static struct mlx5_flow_table *
+mlx5_chains_create_table(struct mlx5_fs_chains *chains,
+ u32 chain, u32 prio, u32 level)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+ int sz;
+
+ if (chains->flags & MLX5_CHAINS_FT_TUNNEL_SUPPORTED)
+ ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+ MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+
+ sz = (chain == mlx5_chains_get_nf_ft_chain(chains)) ?
+ mlx5_chains_get_avail_sz_from_pool(chains, FT_TBL_SZ) :
+ mlx5_chains_get_avail_sz_from_pool(chains, POOL_NEXT_SIZE);
+ if (!sz)
+ return ERR_PTR(-ENOSPC);
+ ft_attr.max_fte = sz;
+
+ /* We use tc_default_ft(chains) as the table's next_ft till
+ * ignore_flow_level is allowed on FT creation and not just for FTEs.
+ * Instead caller should add an explicit miss rule if needed.
+ */
+ ft_attr.next_ft = tc_default_ft(chains);
+
+ /* The root table(chain 0, prio 1, level 0) is required to be
+ * connected to the previous fs_core managed prio.
+ * We always create it, as a managed table, in order to align with
+ * fs_core logic.
+ */
+ if (!mlx5_chains_ignore_flow_level_supported(chains) ||
+ (chain == 0 && prio == 1 && level == 0)) {
+ ft_attr.level = level;
+ ft_attr.prio = prio - 1;
+ ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ?
+ mlx5_get_fdb_sub_ns(chains->dev, chain) :
+ mlx5_get_flow_namespace(chains->dev, chains->ns);
+ } else {
+ ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.prio = ns_to_chains_fs_prio(chains->ns);
+ /* Firmware doesn't allow us to create another level 0 table,
+ * so we create all unmanaged tables as level 1.
+ *
+ * To connect them, we use explicit miss rules with
+ * ignore_flow_level. Caller is responsible to create
+ * these rules (if needed).
+ */
+ ft_attr.level = 1;
+ ns = mlx5_get_flow_namespace(chains->dev, chains->ns);
+ }
+
+ ft_attr.autogroup.num_reserved_entries = 2;
+ ft_attr.autogroup.max_num_groups = chains->group_num;
+ ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ mlx5_core_warn(chains->dev, "Failed to create chains table err %d (chain: %d, prio: %d, level: %d, size: %d)\n",
+ (int)PTR_ERR(ft), chain, prio, level, sz);
+ mlx5_chains_put_sz_to_pool(chains, sz);
+ return ft;
+ }
+
+ return ft;
+}
+
+static void
+mlx5_chains_destroy_table(struct mlx5_fs_chains *chains,
+ struct mlx5_flow_table *ft)
+{
+ mlx5_chains_put_sz_to_pool(chains, ft->max_fte);
+ mlx5_destroy_flow_table(ft);
+}
+
+static int
+create_chain_restore(struct fs_chain *chain)
+{
+ struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch;
+ char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)];
+ struct mlx5_fs_chains *chains = chain->chains;
+ enum mlx5e_tc_attr_to_reg chain_to_reg;
+ struct mlx5_modify_hdr *mod_hdr;
+ u32 index;
+ int err;
+
+ if (chain->chain == mlx5_chains_get_nf_ft_chain(chains) ||
+ !mlx5_chains_prios_supported(chains))
+ return 0;
+
+ err = mapping_add(chains_mapping(chains), &chain->chain, &index);
+ if (err)
+ return err;
+ if (index == MLX5_FS_DEFAULT_FLOW_TAG) {
+ /* we got the special default flow tag id, so we won't know
+ * if we actually marked the packet with the restore rule
+ * we create.
+ *
+ * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0.
+ */
+ err = mapping_add(chains_mapping(chains),
+ &chain->chain, &index);
+ mapping_remove(chains_mapping(chains),
+ MLX5_FS_DEFAULT_FLOW_TAG);
+ if (err)
+ return err;
+ }
+
+ chain->id = index;
+
+ if (chains->ns == MLX5_FLOW_NAMESPACE_FDB) {
+ chain_to_reg = CHAIN_TO_REG;
+ chain->restore_rule = esw_add_restore_rule(esw, chain->id);
+ if (IS_ERR(chain->restore_rule)) {
+ err = PTR_ERR(chain->restore_rule);
+ goto err_rule;
+ }
+ } else if (chains->ns == MLX5_FLOW_NAMESPACE_KERNEL) {
+ /* For NIC RX we don't need a restore rule
+ * since we write the metadata to reg_b
+ * that is passed to SW directly.
+ */
+ chain_to_reg = NIC_CHAIN_TO_REG;
+ } else {
+ err = -EINVAL;
+ goto err_rule;
+ }
+
+ MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, modact, field,
+ mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mfield);
+ MLX5_SET(set_action_in, modact, offset,
+ mlx5e_tc_attr_to_reg_mappings[chain_to_reg].moffset * 8);
+ MLX5_SET(set_action_in, modact, length,
+ mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen * 8);
+ MLX5_SET(set_action_in, modact, data, chain->id);
+ mod_hdr = mlx5_modify_header_alloc(chains->dev, chains->ns,
+ 1, modact);
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ goto err_mod_hdr;
+ }
+ chain->miss_modify_hdr = mod_hdr;
+
+ return 0;
+
+err_mod_hdr:
+ if (!IS_ERR_OR_NULL(chain->restore_rule))
+ mlx5_del_flow_rules(chain->restore_rule);
+err_rule:
+ /* Datapath can't find this mapping, so we can safely remove it */
+ mapping_remove(chains_mapping(chains), chain->id);
+ return err;
+}
+
+static void destroy_chain_restore(struct fs_chain *chain)
+{
+ struct mlx5_fs_chains *chains = chain->chains;
+
+ if (!chain->miss_modify_hdr)
+ return;
+
+ if (chain->restore_rule)
+ mlx5_del_flow_rules(chain->restore_rule);
+
+ mlx5_modify_header_dealloc(chains->dev, chain->miss_modify_hdr);
+ mapping_remove(chains_mapping(chains), chain->id);
+}
+
+static struct fs_chain *
+mlx5_chains_create_chain(struct mlx5_fs_chains *chains, u32 chain)
+{
+ struct fs_chain *chain_s = NULL;
+ int err;
+
+ chain_s = kvzalloc(sizeof(*chain_s), GFP_KERNEL);
+ if (!chain_s)
+ return ERR_PTR(-ENOMEM);
+
+ chain_s->chains = chains;
+ chain_s->chain = chain;
+ INIT_LIST_HEAD(&chain_s->prios_list);
+
+ err = create_chain_restore(chain_s);
+ if (err)
+ goto err_restore;
+
+ err = rhashtable_insert_fast(&chains_ht(chains), &chain_s->node,
+ chain_params);
+ if (err)
+ goto err_insert;
+
+ return chain_s;
+
+err_insert:
+ destroy_chain_restore(chain_s);
+err_restore:
+ kvfree(chain_s);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_chains_destroy_chain(struct fs_chain *chain)
+{
+ struct mlx5_fs_chains *chains = chain->chains;
+
+ rhashtable_remove_fast(&chains_ht(chains), &chain->node,
+ chain_params);
+
+ destroy_chain_restore(chain);
+ kvfree(chain);
+}
+
+static struct fs_chain *
+mlx5_chains_get_chain(struct mlx5_fs_chains *chains, u32 chain)
+{
+ struct fs_chain *chain_s;
+
+ chain_s = rhashtable_lookup_fast(&chains_ht(chains), &chain,
+ chain_params);
+ if (!chain_s) {
+ chain_s = mlx5_chains_create_chain(chains, chain);
+ if (IS_ERR(chain_s))
+ return chain_s;
+ }
+
+ chain_s->ref++;
+
+ return chain_s;
+}
+
+static struct mlx5_flow_handle *
+mlx5_chains_add_miss_rule(struct fs_chain *chain,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft)
+{
+ struct mlx5_fs_chains *chains = chain->chains;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act act = {};
+
+ act.flags = FLOW_ACT_NO_APPEND;
+ if (mlx5_chains_ignore_flow_level_supported(chain->chains))
+ act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+
+ act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = next_ft;
+
+ if (next_ft == tc_end_ft(chains) &&
+ chain->chain != mlx5_chains_get_nf_ft_chain(chains) &&
+ mlx5_chains_prios_supported(chains)) {
+ act.modify_hdr = chain->miss_modify_hdr;
+ act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ }
+
+ return mlx5_add_flow_rules(ft, NULL, &act, &dest, 1);
+}
+
+static int
+mlx5_chains_update_prio_prevs(struct prio *prio,
+ struct mlx5_flow_table *next_ft)
+{
+ struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {};
+ struct fs_chain *chain = prio->chain;
+ struct prio *pos;
+ int n = 0, err;
+
+ if (prio->key.level)
+ return 0;
+
+ /* Iterate in reverse order until reaching the level 0 rule of
+ * the previous priority, adding all the miss rules first, so we can
+ * revert them if any of them fails.
+ */
+ pos = prio;
+ list_for_each_entry_continue_reverse(pos,
+ &chain->prios_list,
+ list) {
+ miss_rules[n] = mlx5_chains_add_miss_rule(chain,
+ pos->ft,
+ next_ft);
+ if (IS_ERR(miss_rules[n])) {
+ err = PTR_ERR(miss_rules[n]);
+ goto err_prev_rule;
+ }
+
+ n++;
+ if (!pos->key.level)
+ break;
+ }
+
+ /* Success, delete old miss rules, and update the pointers. */
+ n = 0;
+ pos = prio;
+ list_for_each_entry_continue_reverse(pos,
+ &chain->prios_list,
+ list) {
+ mlx5_del_flow_rules(pos->miss_rule);
+
+ pos->miss_rule = miss_rules[n];
+ pos->next_ft = next_ft;
+
+ n++;
+ if (!pos->key.level)
+ break;
+ }
+
+ return 0;
+
+err_prev_rule:
+ while (--n >= 0)
+ mlx5_del_flow_rules(miss_rules[n]);
+
+ return err;
+}
+
+static void
+mlx5_chains_put_chain(struct fs_chain *chain)
+{
+ if (--chain->ref == 0)
+ mlx5_chains_destroy_chain(chain);
+}
+
+static struct prio *
+mlx5_chains_create_prio(struct mlx5_fs_chains *chains,
+ u32 chain, u32 prio, u32 level)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_handle *miss_rule = NULL;
+ struct mlx5_flow_group *miss_group;
+ struct mlx5_flow_table *next_ft;
+ struct mlx5_flow_table *ft;
+ struct prio *prio_s = NULL;
+ struct fs_chain *chain_s;
+ struct list_head *pos;
+ u32 *flow_group_in;
+ int err;
+
+ chain_s = mlx5_chains_get_chain(chains, chain);
+ if (IS_ERR(chain_s))
+ return ERR_CAST(chain_s);
+
+ prio_s = kvzalloc(sizeof(*prio_s), GFP_KERNEL);
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!prio_s || !flow_group_in) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+
+ /* Chain's prio list is sorted by prio and level.
+ * And all levels of some prio point to the next prio's level 0.
+ * Example list (prio, level):
+ * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0)
+ * In hardware, we will we have the following pointers:
+ * (3,0) -> (5,0) -> (7,0) -> Slow path
+ * (3,1) -> (5,0)
+ * (5,1) -> (7,0)
+ * (6,1) -> (7,0)
+ */
+
+ /* Default miss for each chain: */
+ next_ft = (chain == mlx5_chains_get_nf_ft_chain(chains)) ?
+ tc_default_ft(chains) :
+ tc_end_ft(chains);
+ list_for_each(pos, &chain_s->prios_list) {
+ struct prio *p = list_entry(pos, struct prio, list);
+
+ /* exit on first pos that is larger */
+ if (prio < p->key.prio || (prio == p->key.prio &&
+ level < p->key.level)) {
+ /* Get next level 0 table */
+ next_ft = p->key.level == 0 ? p->ft : p->next_ft;
+ break;
+ }
+ }
+
+ ft = mlx5_chains_create_table(chains, chain, prio, level);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto err_create;
+ }
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index,
+ ft->max_fte - 2);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ ft->max_fte - 1);
+ miss_group = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(miss_group)) {
+ err = PTR_ERR(miss_group);
+ goto err_group;
+ }
+
+ /* Add miss rule to next_ft */
+ miss_rule = mlx5_chains_add_miss_rule(chain_s, ft, next_ft);
+ if (IS_ERR(miss_rule)) {
+ err = PTR_ERR(miss_rule);
+ goto err_miss_rule;
+ }
+
+ prio_s->miss_group = miss_group;
+ prio_s->miss_rule = miss_rule;
+ prio_s->next_ft = next_ft;
+ prio_s->chain = chain_s;
+ prio_s->key.chain = chain;
+ prio_s->key.prio = prio;
+ prio_s->key.level = level;
+ prio_s->ft = ft;
+
+ err = rhashtable_insert_fast(&prios_ht(chains), &prio_s->node,
+ prio_params);
+ if (err)
+ goto err_insert;
+
+ list_add(&prio_s->list, pos->prev);
+
+ /* Table is ready, connect it */
+ err = mlx5_chains_update_prio_prevs(prio_s, ft);
+ if (err)
+ goto err_update;
+
+ kvfree(flow_group_in);
+ return prio_s;
+
+err_update:
+ list_del(&prio_s->list);
+ rhashtable_remove_fast(&prios_ht(chains), &prio_s->node,
+ prio_params);
+err_insert:
+ mlx5_del_flow_rules(miss_rule);
+err_miss_rule:
+ mlx5_destroy_flow_group(miss_group);
+err_group:
+ mlx5_chains_destroy_table(chains, ft);
+err_create:
+err_alloc:
+ kvfree(prio_s);
+ kvfree(flow_group_in);
+ mlx5_chains_put_chain(chain_s);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_chains_destroy_prio(struct mlx5_fs_chains *chains,
+ struct prio *prio)
+{
+ struct fs_chain *chain = prio->chain;
+
+ WARN_ON(mlx5_chains_update_prio_prevs(prio,
+ prio->next_ft));
+
+ list_del(&prio->list);
+ rhashtable_remove_fast(&prios_ht(chains), &prio->node,
+ prio_params);
+ mlx5_del_flow_rules(prio->miss_rule);
+ mlx5_destroy_flow_group(prio->miss_group);
+ mlx5_chains_destroy_table(chains, prio->ft);
+ mlx5_chains_put_chain(chain);
+ kvfree(prio);
+}
+
+struct mlx5_flow_table *
+mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level)
+{
+ struct mlx5_flow_table *prev_fts;
+ struct prio *prio_s;
+ struct prio_key key;
+ int l = 0;
+
+ if ((chain > mlx5_chains_get_chain_range(chains) &&
+ chain != mlx5_chains_get_nf_ft_chain(chains)) ||
+ prio > mlx5_chains_get_prio_range(chains) ||
+ level > mlx5_chains_get_level_range(chains))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ /* create earlier levels for correct fs_core lookup when
+ * connecting tables.
+ */
+ for (l = 0; l < level; l++) {
+ prev_fts = mlx5_chains_get_table(chains, chain, prio, l);
+ if (IS_ERR(prev_fts)) {
+ prio_s = ERR_CAST(prev_fts);
+ goto err_get_prevs;
+ }
+ }
+
+ key.chain = chain;
+ key.prio = prio;
+ key.level = level;
+
+ mutex_lock(&chains_lock(chains));
+ prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key,
+ prio_params);
+ if (!prio_s) {
+ prio_s = mlx5_chains_create_prio(chains, chain,
+ prio, level);
+ if (IS_ERR(prio_s))
+ goto err_create_prio;
+ }
+
+ ++prio_s->ref;
+ mutex_unlock(&chains_lock(chains));
+
+ return prio_s->ft;
+
+err_create_prio:
+ mutex_unlock(&chains_lock(chains));
+err_get_prevs:
+ while (--l >= 0)
+ mlx5_chains_put_table(chains, chain, prio, l);
+ return ERR_CAST(prio_s);
+}
+
+void
+mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level)
+{
+ struct prio *prio_s;
+ struct prio_key key;
+
+ key.chain = chain;
+ key.prio = prio;
+ key.level = level;
+
+ mutex_lock(&chains_lock(chains));
+ prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key,
+ prio_params);
+ if (!prio_s)
+ goto err_get_prio;
+
+ if (--prio_s->ref == 0)
+ mlx5_chains_destroy_prio(chains, prio_s);
+ mutex_unlock(&chains_lock(chains));
+
+ while (level-- > 0)
+ mlx5_chains_put_table(chains, chain, prio, level);
+
+ return;
+
+err_get_prio:
+ mutex_unlock(&chains_lock(chains));
+ WARN_ONCE(1,
+ "Couldn't find table: (chain: %d prio: %d level: %d)",
+ chain, prio, level);
+}
+
+struct mlx5_flow_table *
+mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains)
+{
+ return tc_end_ft(chains);
+}
+
+struct mlx5_flow_table *
+mlx5_chains_create_global_table(struct mlx5_fs_chains *chains)
+{
+ u32 chain, prio, level;
+ int err;
+
+ if (!mlx5_chains_ignore_flow_level_supported(chains)) {
+ err = -EOPNOTSUPP;
+
+ mlx5_core_warn(chains->dev,
+ "Couldn't create global flow table, ignore_flow_level not supported.");
+ goto err_ignore;
+ }
+
+ chain = mlx5_chains_get_chain_range(chains),
+ prio = mlx5_chains_get_prio_range(chains);
+ level = mlx5_chains_get_level_range(chains);
+
+ return mlx5_chains_create_table(chains, chain, prio, level);
+
+err_ignore:
+ return ERR_PTR(err);
+}
+
+void
+mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains,
+ struct mlx5_flow_table *ft)
+{
+ mlx5_chains_destroy_table(chains, ft);
+}
+
+static struct mlx5_fs_chains *
+mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
+{
+ struct mlx5_fs_chains *chains_priv;
+ struct mapping_ctx *mapping;
+ u32 max_flow_counter;
+ int err;
+
+ chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL);
+ if (!chains_priv)
+ return ERR_PTR(-ENOMEM);
+
+ max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+ MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+
+ mlx5_core_dbg(dev,
+ "Init flow table chains, max counters(%d), groups(%d), max flow table size(%d)\n",
+ max_flow_counter, attr->max_grp_num, attr->max_ft_sz);
+
+ chains_priv->dev = dev;
+ chains_priv->flags = attr->flags;
+ chains_priv->ns = attr->ns;
+ chains_priv->group_num = attr->max_grp_num;
+ tc_default_ft(chains_priv) = tc_end_ft(chains_priv) = attr->default_ft;
+
+ mlx5_core_info(dev, "Supported tc offload range - chains: %u, prios: %u\n",
+ mlx5_chains_get_chain_range(chains_priv),
+ mlx5_chains_get_prio_range(chains_priv));
+
+ mlx5_chains_init_sz_pool(chains_priv, attr->max_ft_sz);
+
+ err = rhashtable_init(&chains_ht(chains_priv), &chain_params);
+ if (err)
+ goto init_chains_ht_err;
+
+ err = rhashtable_init(&prios_ht(chains_priv), &prio_params);
+ if (err)
+ goto init_prios_ht_err;
+
+ mapping = mapping_create(sizeof(u32), attr->max_restore_tag,
+ true);
+ if (IS_ERR(mapping)) {
+ err = PTR_ERR(mapping);
+ goto mapping_err;
+ }
+ chains_mapping(chains_priv) = mapping;
+
+ mutex_init(&chains_lock(chains_priv));
+
+ return chains_priv;
+
+mapping_err:
+ rhashtable_destroy(&prios_ht(chains_priv));
+init_prios_ht_err:
+ rhashtable_destroy(&chains_ht(chains_priv));
+init_chains_ht_err:
+ kfree(chains_priv);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_chains_cleanup(struct mlx5_fs_chains *chains)
+{
+ mutex_destroy(&chains_lock(chains));
+ mapping_destroy(chains_mapping(chains));
+ rhashtable_destroy(&prios_ht(chains));
+ rhashtable_destroy(&chains_ht(chains));
+
+ kfree(chains);
+}
+
+struct mlx5_fs_chains *
+mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
+{
+ struct mlx5_fs_chains *chains;
+
+ chains = mlx5_chains_init(dev, attr);
+
+ return chains;
+}
+
+void
+mlx5_chains_destroy(struct mlx5_fs_chains *chains)
+{
+ mlx5_chains_cleanup(chains);
+}
+
+int
+mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain,
+ u32 *chain_mapping)
+{
+ return mapping_add(chains_mapping(chains), &chain, chain_mapping);
+}
+
+int
+mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains, u32 chain_mapping)
+{
+ return mapping_remove(chains_mapping(chains), chain_mapping);
+}
+
+int mlx5_get_chain_for_tag(struct mlx5_fs_chains *chains, u32 tag,
+ u32 *chain)
+{
+ int err;
+
+ err = mapping_find(chains_mapping(chains), tag, chain);
+ if (err) {
+ mlx5_core_warn(chains->dev, "Can't find chain for tag: %d\n", tag);
+ return -ENOENT;
+ }
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
new file mode 100644
index 000000000000..6d5be31b05dd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __ML5_ESW_CHAINS_H__
+#define __ML5_ESW_CHAINS_H__
+
+#include <linux/mlx5/fs.h>
+
+struct mlx5_fs_chains;
+
+enum mlx5_chains_flags {
+ MLX5_CHAINS_AND_PRIOS_SUPPORTED = BIT(0),
+ MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED = BIT(1),
+ MLX5_CHAINS_FT_TUNNEL_SUPPORTED = BIT(2),
+};
+
+struct mlx5_chains_attr {
+ enum mlx5_flow_namespace_type ns;
+ u32 flags;
+ u32 max_ft_sz;
+ u32 max_grp_num;
+ struct mlx5_flow_table *default_ft;
+ u32 max_restore_tag;
+};
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+bool
+mlx5_chains_prios_supported(struct mlx5_fs_chains *chains);
+bool
+mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains);
+u32
+mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains);
+u32
+mlx5_chains_get_chain_range(struct mlx5_fs_chains *chains);
+u32
+mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains);
+
+struct mlx5_flow_table *
+mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level);
+void
+mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level);
+
+struct mlx5_flow_table *
+mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains);
+
+struct mlx5_flow_table *
+mlx5_chains_create_global_table(struct mlx5_fs_chains *chains);
+void
+mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains,
+ struct mlx5_flow_table *ft);
+
+int
+mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain,
+ u32 *chain_mapping);
+int
+mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains,
+ u32 chain_mapping);
+
+struct mlx5_fs_chains *
+mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr);
+void mlx5_chains_destroy(struct mlx5_fs_chains *chains);
+
+int
+mlx5_get_chain_for_tag(struct mlx5_fs_chains *chains, u32 tag, u32 *chain);
+
+void
+mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
+ struct mlx5_flow_table *ft);
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static inline struct mlx5_flow_table *
+mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level) { return ERR_PTR(-EOPNOTSUPP); }
+static inline void
+mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
+ u32 level) {};
+
+static inline struct mlx5_flow_table *
+mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains) { return ERR_PTR(-EOPNOTSUPP); }
+
+static inline struct mlx5_fs_chains *
+mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
+{ return NULL; }
+static inline void
+mlx5_chains_destroy(struct mlx5_fs_chains *chains) {};
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
+#endif /* __ML5_ESW_CHAINS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index ce43e3feccd9..8ff207aa1479 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -57,6 +57,7 @@
#include "lib/mpfs.h"
#include "eswitch.h"
#include "devlink.h"
+#include "fw_reset.h"
#include "lib/mlx5.h"
#include "fpga/core.h"
#include "fpga/ipsec.h"
@@ -548,6 +549,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
if (MLX5_CAP_GEN_MAX(dev, dct))
MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1);
+ if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_event))
+ MLX5_SET(cmd_hca_cap, set_hca_cap, pci_sync_for_fw_update_event, 1);
+
if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports))
MLX5_SET(cmd_hca_cap,
set_hca_cap,
@@ -739,7 +743,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
pci_set_drvdata(dev->pdev, dev);
dev->bar_addr = pci_resource_start(pdev, 0);
- priv->numa_node = dev_to_node(&dev->pdev->dev);
+ priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev));
err = mlx5_pci_enable_device(dev);
if (err) {
@@ -832,6 +836,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
goto err_eq_cleanup;
}
+ err = mlx5_fw_reset_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "failed to initialize fw reset events\n");
+ goto err_events_cleanup;
+ }
+
mlx5_cq_debugfs_init(dev);
mlx5_init_reserved_gids(dev);
@@ -893,6 +903,8 @@ err_tables_cleanup:
mlx5_geneve_destroy(dev->geneve);
mlx5_vxlan_destroy(dev->vxlan);
mlx5_cq_debugfs_cleanup(dev);
+ mlx5_fw_reset_cleanup(dev);
+err_events_cleanup:
mlx5_events_cleanup(dev);
err_eq_cleanup:
mlx5_eq_table_cleanup(dev);
@@ -920,6 +932,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_cleanup_clock(dev);
mlx5_cleanup_reserved_gids(dev);
mlx5_cq_debugfs_cleanup(dev);
+ mlx5_fw_reset_cleanup(dev);
mlx5_events_cleanup(dev);
mlx5_eq_table_cleanup(dev);
mlx5_irq_table_cleanup(dev);
@@ -1078,6 +1091,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
goto err_fw_tracer;
}
+ mlx5_fw_reset_events_start(dev);
mlx5_hv_vhca_init(dev->hv_vhca);
err = mlx5_rsc_dump_init(dev);
@@ -1139,6 +1153,7 @@ err_fpga_start:
mlx5_rsc_dump_cleanup(dev);
err_rsc_dump:
mlx5_hv_vhca_cleanup(dev->hv_vhca);
+ mlx5_fw_reset_events_stop(dev);
mlx5_fw_tracer_cleanup(dev->tracer);
err_fw_tracer:
mlx5_eq_table_destroy(dev);
@@ -1161,6 +1176,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
mlx5_fpga_device_stop(dev);
mlx5_rsc_dump_cleanup(dev);
mlx5_hv_vhca_cleanup(dev->hv_vhca);
+ mlx5_fw_reset_events_stop(dev);
mlx5_fw_tracer_cleanup(dev->tracer);
mlx5_eq_table_destroy(dev);
mlx5_irq_table_destroy(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index fc1649dac11b..8cec85ab419d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -100,6 +100,11 @@ do { \
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
+static inline struct device *mlx5_core_dma_dev(struct mlx5_core_dev *dev)
+{
+ return &dev->pdev->dev;
+}
+
enum {
MLX5_CMD_DATA, /* print command payload only */
MLX5_CMD_TIME, /* print command execution time */
@@ -123,6 +128,8 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev);
void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
void mlx5_error_sw_reset(struct mlx5_core_dev *dev);
+u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev);
+int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev);
void mlx5_disable_device(struct mlx5_core_dev *dev);
void mlx5_recover_device(struct mlx5_core_dev *dev);
int mlx5_sriov_init(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index f9b798af6335..150638814517 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -238,7 +238,7 @@ static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp,
rb_erase(&fwp->rb_node, root);
if (in_free_list)
list_del(&fwp->list);
- dma_unmap_page(dev->device, fwp->addr & MLX5_U64_4K_PAGE_MASK,
+ dma_unmap_page(mlx5_core_dma_dev(dev), fwp->addr & MLX5_U64_4K_PAGE_MASK,
PAGE_SIZE, DMA_BIDIRECTIONAL);
__free_page(fwp->page);
kfree(fwp);
@@ -265,7 +265,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 func_id)
static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
{
- struct device *device = dev->device;
+ struct device *device = mlx5_core_dma_dev(dev);
int nid = dev_to_node(device);
struct page *page;
u64 zero_addr = 1;
@@ -432,7 +432,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
u32 npages;
u32 i = 0;
- if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ if (!mlx5_cmd_is_down(dev))
return mlx5_cmd_exec(dev, in, in_size, out, out_size);
/* No hard feelings, we want our pages back! */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 373981a659c7..6fd974920394 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -115,7 +115,7 @@ static int request_irqs(struct mlx5_core_dev *dev, int nvec)
return 0;
err_request_irq:
- for (; i >= 0; i--) {
+ while (i--) {
struct mlx5_irq *irq = mlx5_irq_get(dev, i);
int irqn = pci_irq_vector(dev->pdev, i);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index e4186e84b3ff..4bb219565c58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -154,24 +154,8 @@ int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration)
sizeof(out), MLX5_REG_MLCR, 0, 1);
}
-int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
- u8 *link_width_oper, u8 local_port)
-{
- u32 out[MLX5_ST_SZ_DW(ptys_reg)];
- int err;
-
- err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, local_port);
- if (err)
- return err;
-
- *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper);
-
-int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
- u8 *proto_oper, u8 local_port)
+int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper,
+ u16 *proto_oper, u8 local_port)
{
u32 out[MLX5_ST_SZ_DW(ptys_reg)];
int err;
@@ -181,11 +165,12 @@ int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
if (err)
return err;
+ *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
*proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
return 0;
}
-EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper);
+EXPORT_SYMBOL(mlx5_query_ib_port_oper);
/* This function should be used after setting a port register only */
void mlx5_toggle_port_link(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
index c63f727273d8..7df883686d46 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -203,7 +203,6 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
struct mlx5dr_match_param mask = {};
- struct mlx5dr_match_misc3 *misc3;
struct mlx5dr_ste_build *sb;
bool inner, rx;
int idx = 0;
@@ -252,18 +251,14 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
if (dr_mask_is_gvmi_or_qpn_set(&mask.misc) &&
(dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX)) {
- ret = mlx5dr_ste_build_src_gvmi_qpn(&sb[idx++], &mask,
- dmn, inner, rx);
- if (ret)
- return ret;
+ mlx5dr_ste_build_src_gvmi_qpn(&sb[idx++], &mask,
+ dmn, inner, rx);
}
if (dr_mask_is_smac_set(&mask.outer) &&
dr_mask_is_dmac_set(&mask.outer)) {
- ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], &mask,
- inner, rx);
- if (ret)
- return ret;
+ mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], &mask,
+ inner, rx);
}
if (dr_mask_is_smac_set(&mask.outer))
@@ -313,8 +308,7 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask,
inner, rx);
- misc3 = &mask.misc3;
- if ((DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc3) &&
+ if ((DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(&mask.misc3) &&
mlx5dr_matcher_supp_flex_parser_icmp_v4(&dmn->info.caps)) ||
(dr_mask_is_flex_parser_icmpv6_set(&mask.misc3) &&
mlx5dr_matcher_supp_flex_parser_icmp_v6(&dmn->info.caps))) {
@@ -340,10 +334,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
if (dr_mask_is_smac_set(&mask.inner) &&
dr_mask_is_dmac_set(&mask.inner)) {
- ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++],
- &mask, inner, rx);
- if (ret)
- return ret;
+ mlx5dr_ste_build_eth_l2_src_des(&sb[idx++],
+ &mask, inner, rx);
}
if (dr_mask_is_smac_set(&mask.inner))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
index 6ec5106bc472..b3c9dc032026 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
@@ -242,7 +242,7 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher,
new_idx = mlx5dr_ste_calc_hash_index(hw_ste, new_htbl);
new_ste = &new_htbl->ste_arr[new_idx];
- if (mlx5dr_ste_not_used_ste(new_ste)) {
+ if (mlx5dr_ste_is_not_used(new_ste)) {
mlx5dr_htbl_get(new_htbl);
list_add_tail(&new_ste->miss_list_node,
mlx5dr_ste_get_miss_list(new_ste));
@@ -335,7 +335,7 @@ static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher,
for (i = 0; i < cur_entries; i++) {
cur_ste = &cur_htbl->ste_arr[i];
- if (mlx5dr_ste_not_used_ste(cur_ste)) /* Empty, nothing to copy */
+ if (mlx5dr_ste_is_not_used(cur_ste)) /* Empty, nothing to copy */
continue;
err = dr_rule_rehash_copy_miss_list(matcher,
@@ -791,7 +791,7 @@ again:
miss_list = &cur_htbl->chunk->miss_list[index];
ste = &cur_htbl->ste_arr[index];
- if (mlx5dr_ste_not_used_ste(ste)) {
+ if (mlx5dr_ste_is_not_used(ste)) {
if (dr_rule_handle_empty_entry(matcher, nic_matcher, cur_htbl,
ste, ste_location,
hw_ste, miss_list,
@@ -985,31 +985,28 @@ static enum mlx5dr_ipv dr_rule_get_ipv(struct mlx5dr_match_spec *spec)
static bool dr_rule_skip(enum mlx5dr_domain_type domain,
enum mlx5dr_ste_entry_type ste_type,
struct mlx5dr_match_param *mask,
- struct mlx5dr_match_param *value)
+ struct mlx5dr_match_param *value,
+ u32 flow_source)
{
+ bool rx = ste_type == MLX5DR_STE_TYPE_RX;
+
if (domain != MLX5DR_DOMAIN_TYPE_FDB)
return false;
if (mask->misc.source_port) {
- if (ste_type == MLX5DR_STE_TYPE_RX)
- if (value->misc.source_port != WIRE_PORT)
- return true;
+ if (rx && value->misc.source_port != WIRE_PORT)
+ return true;
- if (ste_type == MLX5DR_STE_TYPE_TX)
- if (value->misc.source_port == WIRE_PORT)
- return true;
+ if (!rx && value->misc.source_port == WIRE_PORT)
+ return true;
}
- /* Metadata C can be used to describe the source vport */
- if (mask->misc2.metadata_reg_c_0) {
- if (ste_type == MLX5DR_STE_TYPE_RX)
- if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) != WIRE_PORT)
- return true;
+ if (rx && flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT)
+ return true;
+
+ if (!rx && flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK)
+ return true;
- if (ste_type == MLX5DR_STE_TYPE_TX)
- if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) == WIRE_PORT)
- return true;
- }
return false;
}
@@ -1038,7 +1035,8 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
INIT_LIST_HEAD(&nic_rule->rule_members_list);
- if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param))
+ if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param,
+ rule->flow_source))
return 0;
hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL);
@@ -1173,7 +1171,8 @@ static struct mlx5dr_rule *
dr_rule_create_rule(struct mlx5dr_matcher *matcher,
struct mlx5dr_match_parameters *value,
size_t num_actions,
- struct mlx5dr_action *actions[])
+ struct mlx5dr_action *actions[],
+ u32 flow_source)
{
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
struct mlx5dr_match_param param = {};
@@ -1188,6 +1187,7 @@ dr_rule_create_rule(struct mlx5dr_matcher *matcher,
return NULL;
rule->matcher = matcher;
+ rule->flow_source = flow_source;
INIT_LIST_HEAD(&rule->rule_actions_list);
ret = dr_rule_add_action_members(rule, num_actions, actions);
@@ -1232,13 +1232,14 @@ free_rule:
struct mlx5dr_rule *mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
struct mlx5dr_match_parameters *value,
size_t num_actions,
- struct mlx5dr_action *actions[])
+ struct mlx5dr_action *actions[],
+ u32 flow_source)
{
struct mlx5dr_rule *rule;
refcount_inc(&matcher->refcount);
- rule = dr_rule_create_rule(matcher, value, num_actions, actions);
+ rule = dr_rule_create_rule(matcher, value, num_actions, actions, flow_source);
if (!rule)
refcount_dec(&matcher->refcount);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index 2ca79b9bde1f..24dede1b0a20 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -466,10 +466,10 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
* need to add the bit_mask
*/
for (j = 0; j < num_stes_per_iter; j++) {
- u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste;
+ struct mlx5dr_ste *ste = &htbl->ste_arr[ste_index + j];
u32 ste_off = j * DR_STE_SIZE;
- if (mlx5dr_ste_is_not_valid_entry(hw_ste)) {
+ if (mlx5dr_ste_is_not_used(ste)) {
memcpy(data + ste_off,
formatted_ste, DR_STE_SIZE);
} else {
@@ -831,7 +831,7 @@ static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
if (!mr)
return NULL;
- dma_device = &mdev->pdev->dev;
+ dma_device = mlx5_core_dma_dev(mdev);
dma_addr = dma_map_single(dma_device, buf, size,
DMA_BIDIRECTIONAL);
err = dma_mapping_error(dma_device, dma_addr);
@@ -860,7 +860,7 @@ static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
{
mlx5_core_destroy_mkey(mdev, &mr->mkey);
- dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size,
+ dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size,
DMA_BIDIRECTIONAL);
kfree(mr);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 00c2f598f034..b01aaec75622 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -155,6 +155,13 @@ static u16 dr_ste_conv_bit_to_byte_mask(u8 *bit_mask)
return byte_mask;
}
+static u8 *mlx5dr_ste_get_tag(u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+
+ return hw_ste->tag;
+}
+
void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask)
{
struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
@@ -549,25 +556,6 @@ void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr)
dr_ste_set_always_miss((struct dr_hw_ste_format *)ste->hw_ste);
}
-/* The assumption here is that we don't update the ste->hw_ste if it is not
- * used ste, so it will be all zero, checking the next_lu_type.
- */
-bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste)
-{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)p_hw_ste;
-
- if (MLX5_GET(ste_general, hw_ste, next_lu_type) ==
- MLX5DR_STE_LU_TYPE_NOP)
- return true;
-
- return false;
-}
-
-bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste)
-{
- return !ste->refcount;
-}
-
/* Init one ste as a pattern for ste data array */
void mlx5dr_ste_set_formatted_ste(u16 gvmi,
struct mlx5dr_domain_rx_tx *nic_dmn,
@@ -728,7 +716,14 @@ int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn,
{
if (!value && (match_criteria & DR_MATCHER_CRITERIA_MISC)) {
if (mask->misc.source_port && mask->misc.source_port != 0xffff) {
- mlx5dr_err(dmn, "Partial mask source_port is not supported\n");
+ mlx5dr_err(dmn,
+ "Partial mask source_port is not supported\n");
+ return -EINVAL;
+ }
+ if (mask->misc.source_eswitch_owner_vhca_id &&
+ mask->misc.source_eswitch_owner_vhca_id != 0xffff) {
+ mlx5dr_err(dmn,
+ "Partial mask source_eswitch_owner_vhca_id is not supported\n");
return -EINVAL;
}
}
@@ -760,7 +755,7 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
mlx5dr_ste_set_bit_mask(ste_arr, sb->bit_mask);
- ret = sb->ste_build_tag_func(value, sb, ste_arr);
+ ret = sb->ste_build_tag_func(value, sb, mlx5dr_ste_get_tag(ste_arr));
if (ret)
return ret;
@@ -778,8 +773,8 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
return 0;
}
-static int dr_ste_build_eth_l2_src_des_bit_mask(struct mlx5dr_match_param *value,
- bool inner, u8 *bit_mask)
+static void dr_ste_build_eth_l2_src_des_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
{
struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
@@ -807,13 +802,6 @@ static int dr_ste_build_eth_l2_src_des_bit_mask(struct mlx5dr_match_param *value
MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1);
mask->svlan_tag = 0;
}
-
- if (mask->cvlan_tag || mask->svlan_tag) {
- pr_info("Invalid c/svlan mask configuration\n");
- return -EINVAL;
- }
-
- return 0;
}
static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec)
@@ -1059,11 +1047,9 @@ void mlx5dr_ste_copy_param(u8 match_criteria,
static int dr_ste_build_eth_l2_src_des_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- u8 *tag = hw_ste->tag;
DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_47_16, spec, dmac_47_16);
DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_15_0, spec, dmac_15_0);
@@ -1104,23 +1090,17 @@ static int dr_ste_build_eth_l2_src_des_tag(struct mlx5dr_match_param *value,
return 0;
}
-int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask,
- bool inner, bool rx)
+void mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
{
- int ret;
-
- ret = dr_ste_build_eth_l2_src_des_bit_mask(mask, inner, sb->bit_mask);
- if (ret)
- return ret;
+ dr_ste_build_eth_l2_src_des_bit_mask(mask, inner, sb->bit_mask);
sb->rx = rx;
sb->inner = inner;
sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC_DST, rx, inner);
sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
sb->ste_build_tag_func = &dr_ste_build_eth_l2_src_des_tag;
-
- return 0;
}
static void dr_ste_build_eth_l3_ipv6_dst_bit_mask(struct mlx5dr_match_param *value,
@@ -1136,11 +1116,9 @@ static void dr_ste_build_eth_l3_ipv6_dst_bit_mask(struct mlx5dr_match_param *val
static int dr_ste_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- u8 *tag = hw_ste->tag;
DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96);
DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64);
@@ -1176,11 +1154,9 @@ static void dr_ste_build_eth_l3_ipv6_src_bit_mask(struct mlx5dr_match_param *val
static int dr_ste_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- u8 *tag = hw_ste->tag;
DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96);
DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64);
@@ -1238,11 +1214,9 @@ static void dr_ste_build_eth_l3_ipv4_5_tuple_bit_mask(struct mlx5dr_match_param
static int dr_ste_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- u8 *tag = hw_ste->tag;
DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_address, spec, dst_ip_31_0);
DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_address, spec, src_ip_31_0);
@@ -1328,12 +1302,10 @@ dr_ste_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value,
}
static int dr_ste_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value,
- bool inner, u8 *hw_ste_p)
+ bool inner, u8 *tag)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer;
struct mlx5dr_match_misc *misc_spec = &value->misc;
- u8 *tag = hw_ste->tag;
DR_STE_SET_TAG(eth_l2_src, tag, first_vlan_id, spec, first_vid);
DR_STE_SET_TAG(eth_l2_src, tag, first_cfi, spec, first_cfi);
@@ -1403,16 +1375,14 @@ static void dr_ste_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value,
static int dr_ste_build_eth_l2_src_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- u8 *tag = hw_ste->tag;
DR_STE_SET_TAG(eth_l2_src, tag, smac_47_16, spec, smac_47_16);
DR_STE_SET_TAG(eth_l2_src, tag, smac_15_0, spec, smac_15_0);
- return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p);
+ return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, tag);
}
void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_build *sb,
@@ -1440,16 +1410,14 @@ static void dr_ste_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value,
static int dr_ste_build_eth_l2_dst_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- u8 *tag = hw_ste->tag;
DR_STE_SET_TAG(eth_l2_dst, tag, dmac_47_16, spec, dmac_47_16);
DR_STE_SET_TAG(eth_l2_dst, tag, dmac_15_0, spec, dmac_15_0);
- return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p);
+ return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, tag);
}
void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_build *sb,
@@ -1495,12 +1463,10 @@ static void dr_ste_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value,
static int dr_ste_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_misc *misc = &value->misc;
- u8 *tag = hw_ste->tag;
DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_47_16, spec, dmac_47_16);
DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_15_0, spec, dmac_15_0);
@@ -1561,11 +1527,9 @@ static void dr_ste_build_eth_l3_ipv4_misc_bit_mask(struct mlx5dr_match_param *va
static int dr_ste_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- u8 *tag = hw_ste->tag;
DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, time_to_live, spec, ttl_hoplimit);
@@ -1608,11 +1572,9 @@ static void dr_ste_build_ipv6_l3_l4_bit_mask(struct mlx5dr_match_param *value,
static int dr_ste_build_ipv6_l3_l4_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
- u8 *tag = hw_ste->tag;
DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, tcp_dport);
DR_STE_SET_TAG(eth_l4, tag, src_port, spec, tcp_sport);
@@ -1647,7 +1609,7 @@ void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb,
static int dr_ste_build_empty_always_hit_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
return 0;
}
@@ -1673,11 +1635,9 @@ static void dr_ste_build_mpls_bit_mask(struct mlx5dr_match_param *value,
static int dr_ste_build_mpls_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_misc2 *misc2_mask = &value->misc2;
- u8 *tag = hw_ste->tag;
if (sb->inner)
DR_STE_SET_MPLS_TAG(mpls, misc2_mask, inner, tag);
@@ -1716,11 +1676,9 @@ static void dr_ste_build_gre_bit_mask(struct mlx5dr_match_param *value,
static int dr_ste_build_gre_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_misc *misc = &value->misc;
- u8 *tag = hw_ste->tag;
DR_STE_SET_TAG(gre, tag, gre_protocol, misc, gre_protocol);
@@ -1781,11 +1739,9 @@ static void dr_ste_build_flex_parser_0_bit_mask(struct mlx5dr_match_param *value
static int dr_ste_build_flex_parser_0_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
- u8 *tag = hw_ste->tag;
if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2_mask)) {
DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label,
@@ -1903,11 +1859,9 @@ static int dr_ste_build_flex_parser_1_bit_mask(struct mlx5dr_match_param *mask,
static int dr_ste_build_flex_parser_1_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_misc3 *misc_3 = &value->misc3;
- u8 *tag = hw_ste->tag;
u32 icmp_header_data;
int dw0_location;
int dw1_location;
@@ -2007,11 +1961,9 @@ static void dr_ste_build_general_purpose_bit_mask(struct mlx5dr_match_param *val
static int dr_ste_build_general_purpose_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
- u8 *tag = hw_ste->tag;
DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field,
misc_2_mask, metadata_reg_a);
@@ -2052,11 +2004,9 @@ static void dr_ste_build_eth_l4_misc_bit_mask(struct mlx5dr_match_param *value,
static int dr_ste_build_eth_l4_misc_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_misc3 *misc3 = &value->misc3;
- u8 *tag = hw_ste->tag;
if (sb->inner) {
DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, inner_tcp_seq_num);
@@ -2102,11 +2052,9 @@ dr_ste_build_flex_parser_tnl_vxlan_gpe_bit_mask(struct mlx5dr_match_param *value
static int
dr_ste_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_misc3 *misc3 = &value->misc3;
- u8 *tag = hw_ste->tag;
DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag,
outer_vxlan_gpe_flags, misc3,
@@ -2158,11 +2106,9 @@ dr_ste_build_flex_parser_tnl_geneve_bit_mask(struct mlx5dr_match_param *value,
static int
dr_ste_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_misc *misc = &value->misc;
- u8 *tag = hw_ste->tag;
DR_STE_SET_TAG(flex_parser_tnl_geneve, tag,
geneve_protocol_type, misc, geneve_protocol_type);
@@ -2205,11 +2151,9 @@ static void dr_ste_build_register_0_bit_mask(struct mlx5dr_match_param *value,
static int dr_ste_build_register_0_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_misc2 *misc2 = &value->misc2;
- u8 *tag = hw_ste->tag;
DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0);
DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1);
@@ -2249,11 +2193,9 @@ static void dr_ste_build_register_1_bit_mask(struct mlx5dr_match_param *value,
static int dr_ste_build_register_1_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_misc2 *misc2 = &value->misc2;
- u8 *tag = hw_ste->tag;
DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4);
DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5);
@@ -2276,38 +2218,25 @@ void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb,
sb->ste_build_tag_func = &dr_ste_build_register_1_tag;
}
-static int dr_ste_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value,
- u8 *bit_mask)
+static void dr_ste_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value,
+ u8 *bit_mask)
{
struct mlx5dr_match_misc *misc_mask = &value->misc;
- /* Partial misc source_port is not supported */
- if (misc_mask->source_port && misc_mask->source_port != 0xffff)
- return -EINVAL;
-
- /* Partial misc source_eswitch_owner_vhca_id is not supported */
- if (misc_mask->source_eswitch_owner_vhca_id &&
- misc_mask->source_eswitch_owner_vhca_id != 0xffff)
- return -EINVAL;
-
DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_gvmi, misc_mask, source_port);
DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_qp, misc_mask, source_sqn);
misc_mask->source_eswitch_owner_vhca_id = 0;
-
- return 0;
}
static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p)
+ u8 *tag)
{
- struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
struct mlx5dr_match_misc *misc = &value->misc;
struct mlx5dr_cmd_vport_cap *vport_cap;
struct mlx5dr_domain *dmn = sb->dmn;
struct mlx5dr_cmd_caps *caps;
u8 *bit_mask = sb->bit_mask;
- u8 *tag = hw_ste->tag;
bool source_gvmi_set;
DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn);
@@ -2339,19 +2268,15 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
return 0;
}
-int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask,
- struct mlx5dr_domain *dmn,
- bool inner, bool rx)
+void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn,
+ bool inner, bool rx)
{
- int ret;
-
/* Set vhca_id_valid before we reset source_eswitch_owner_vhca_id */
sb->vhca_id_valid = mask->misc.source_eswitch_owner_vhca_id;
- ret = dr_ste_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask);
- if (ret)
- return ret;
+ dr_ste_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask);
sb->rx = rx;
sb->dmn = dmn;
@@ -2359,6 +2284,4 @@ int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
sb->lu_type = MLX5DR_STE_LU_TYPE_SRC_GVMI_AND_QP;
sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
sb->ste_build_tag_func = &dr_ste_build_src_gvmi_qpn_tag;
-
- return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 0883956c58c0..f50f3b107aa3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -194,7 +194,7 @@ struct mlx5dr_ste_build {
u8 bit_mask[DR_STE_SIZE_MASK];
int (*ste_build_tag_func)(struct mlx5dr_match_param *spec,
struct mlx5dr_ste_build *sb,
- u8 *hw_ste_p);
+ u8 *tag);
};
struct mlx5dr_ste_htbl *
@@ -227,7 +227,6 @@ void mlx5dr_ste_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi);
void mlx5dr_ste_set_hit_addr(u8 *hw_ste, u64 icm_addr, u32 ht_size);
void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr);
void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask);
-bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste);
bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher,
u8 ste_location);
void mlx5dr_ste_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag);
@@ -266,6 +265,11 @@ static inline void mlx5dr_ste_get(struct mlx5dr_ste *ste)
ste->refcount++;
}
+static inline bool mlx5dr_ste_is_not_used(struct mlx5dr_ste *ste)
+{
+ return !ste->refcount;
+}
+
void mlx5dr_ste_set_hit_addr_by_next_htbl(u8 *hw_ste,
struct mlx5dr_ste_htbl *next_htbl);
bool mlx5dr_ste_equal_tag(void *src, void *dst);
@@ -284,9 +288,9 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
struct mlx5dr_matcher_rx_tx *nic_matcher,
struct mlx5dr_match_param *value,
u8 *ste_arr);
-int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *builder,
- struct mlx5dr_match_param *mask,
- bool inner, bool rx);
+void mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *builder,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx);
@@ -342,10 +346,10 @@ void mlx5dr_ste_build_register_0(struct mlx5dr_ste_build *sb,
void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb,
struct mlx5dr_match_param *mask,
bool inner, bool rx);
-int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
- struct mlx5dr_match_param *mask,
- struct mlx5dr_domain *dmn,
- bool inner, bool rx);
+void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_domain *dmn,
+ bool inner, bool rx);
void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx);
/* Actions utils */
@@ -793,6 +797,7 @@ struct mlx5dr_rule {
struct mlx5dr_rule_rx_tx rx;
struct mlx5dr_rule_rx_tx tx;
struct list_head rule_actions_list;
+ u32 flow_source;
};
void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *new_ste,
@@ -991,7 +996,6 @@ struct mlx5dr_icm_chunk *
mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool,
enum mlx5dr_icm_chunk_size chunk_size);
void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk);
-bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste);
int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn,
struct mlx5dr_domain_rx_tx *nic_dmn,
struct mlx5dr_ste_htbl *htbl,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 9b08eb557a31..96c39a17d026 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -487,7 +487,8 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
rule = mlx5dr_rule_create(group->fs_dr_matcher.dr_matcher,
&params,
num_actions,
- actions);
+ actions,
+ fte->flow_context.flow_source);
if (!rule) {
err = -EINVAL;
goto free_actions;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index 7deaca9ade3b..7914fe3fc68d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -67,7 +67,8 @@ struct mlx5dr_rule *
mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
struct mlx5dr_match_parameters *value,
size_t num_actions,
- struct mlx5dr_action *actions[]);
+ struct mlx5dr_action *actions[],
+ u32 flow_source);
int mlx5dr_rule_destroy(struct mlx5dr_rule *rule);