diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core')
41 files changed, 3398 insertions, 1291 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c index 53e69edaedde..9f1b1939716a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c @@ -37,24 +37,11 @@ #include "mlx5_core.h" #include "fpga/ipsec.h" -void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev, - struct mlx5_accel_ipsec_sa *cmd) -{ - if (!MLX5_IPSEC_DEV(mdev)) - return ERR_PTR(-EOPNOTSUPP); - - return mlx5_fpga_ipsec_sa_cmd_exec(mdev, cmd); -} - -int mlx5_accel_ipsec_sa_cmd_wait(void *ctx) -{ - return mlx5_fpga_ipsec_sa_cmd_wait(ctx); -} - u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) { return mlx5_fpga_ipsec_device_caps(mdev); } +EXPORT_SYMBOL_GPL(mlx5_accel_ipsec_device_caps); unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev) { @@ -67,6 +54,21 @@ int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, return mlx5_fpga_ipsec_counters_read(mdev, counters, count); } +void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm *xfrm, + const __be32 saddr[4], + const __be32 daddr[4], + const __be32 spi, bool is_ipv6) +{ + return mlx5_fpga_ipsec_create_sa_ctx(mdev, xfrm, saddr, daddr, + spi, is_ipv6); +} + +void mlx5_accel_esp_free_hw_context(void *context) +{ + mlx5_fpga_ipsec_delete_sa_ctx(context); +} + int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) { return mlx5_fpga_ipsec_init(mdev); @@ -76,3 +78,32 @@ void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) { mlx5_fpga_ipsec_cleanup(mdev); } + +struct mlx5_accel_esp_xfrm * +mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, + const struct mlx5_accel_esp_xfrm_attrs *attrs, + u32 flags) +{ + struct mlx5_accel_esp_xfrm *xfrm; + + xfrm = mlx5_fpga_esp_create_xfrm(mdev, attrs, flags); + if (IS_ERR(xfrm)) + return xfrm; + + xfrm->mdev = mdev; + return xfrm; +} +EXPORT_SYMBOL_GPL(mlx5_accel_esp_create_xfrm); + +void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) +{ + mlx5_fpga_esp_destroy_xfrm(xfrm); +} +EXPORT_SYMBOL_GPL(mlx5_accel_esp_destroy_xfrm); + +int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, + const struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + return mlx5_fpga_esp_modify_xfrm(xfrm, attrs); +} +EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h index d6e20fea9554..024dbd22a89b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h @@ -34,89 +34,25 @@ #ifndef __MLX5_ACCEL_IPSEC_H__ #define __MLX5_ACCEL_IPSEC_H__ -#ifdef CONFIG_MLX5_ACCEL - #include <linux/mlx5/driver.h> +#include <linux/mlx5/accel.h> -enum { - MLX5_ACCEL_IPSEC_DEVICE = BIT(1), - MLX5_ACCEL_IPSEC_IPV6 = BIT(2), - MLX5_ACCEL_IPSEC_ESP = BIT(3), - MLX5_ACCEL_IPSEC_LSO = BIT(4), -}; - -#define MLX5_IPSEC_SADB_IP_AH BIT(7) -#define MLX5_IPSEC_SADB_IP_ESP BIT(6) -#define MLX5_IPSEC_SADB_SA_VALID BIT(5) -#define MLX5_IPSEC_SADB_SPI_EN BIT(4) -#define MLX5_IPSEC_SADB_DIR_SX BIT(3) -#define MLX5_IPSEC_SADB_IPV6 BIT(2) - -enum { - MLX5_IPSEC_CMD_ADD_SA = 0, - MLX5_IPSEC_CMD_DEL_SA = 1, -}; - -enum mlx5_accel_ipsec_enc_mode { - MLX5_IPSEC_SADB_MODE_NONE = 0, - MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128 = 1, - MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128 = 3, -}; +#ifdef CONFIG_MLX5_ACCEL #define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \ - MLX5_ACCEL_IPSEC_DEVICE) - -struct mlx5_accel_ipsec_sa { - __be32 cmd; - u8 key_enc[32]; - u8 key_auth[32]; - __be32 sip[4]; - __be32 dip[4]; - union { - struct { - __be32 reserved; - u8 salt_iv[8]; - __be32 salt; - } __packed gcm; - struct { - u8 salt[16]; - } __packed cbc; - }; - __be32 spi; - __be32 sw_sa_handle; - __be16 tfclen; - u8 enc_mode; - u8 sip_masklen; - u8 dip_masklen; - u8 flags; - u8 reserved[2]; -} __packed; - -/** - * mlx5_accel_ipsec_sa_cmd_exec - Execute an IPSec SADB command - * @mdev: mlx5 device - * @cmd: command to execute - * May be called from atomic context. Returns context pointer, or error - * Caller must eventually call mlx5_accel_ipsec_sa_cmd_wait from non-atomic - * context, to cleanup the context pointer - */ -void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev, - struct mlx5_accel_ipsec_sa *cmd); - -/** - * mlx5_accel_ipsec_sa_cmd_wait - Wait for command execution completion - * @context: Context pointer returned from call to mlx5_accel_ipsec_sa_cmd_exec - * Sleeps (killable) until command execution is complete. - * Returns the command result, or -EINTR if killed - */ -int mlx5_accel_ipsec_sa_cmd_wait(void *context); - -u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev); + MLX5_ACCEL_IPSEC_CAP_DEVICE) unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev); int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, unsigned int count); +void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm *xfrm, + const __be32 saddr[4], + const __be32 daddr[4], + const __be32 spi, bool is_ipv6); +void mlx5_accel_esp_free_hw_context(void *context); + int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev); void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev); @@ -124,6 +60,20 @@ void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev); #define MLX5_IPSEC_DEV(mdev) false +static inline void * +mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm *xfrm, + const __be32 saddr[4], + const __be32 daddr[4], + const __be32 spi, bool is_ipv6) +{ + return NULL; +} + +static inline void mlx5_accel_esp_free_hw_context(void *context) +{ +} + static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) { return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 47239bf7bf43..323ffe8bf7e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -71,19 +71,24 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, } int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, - struct mlx5_buf *buf, int node) + struct mlx5_frag_buf *buf, int node) { dma_addr_t t; buf->size = size; buf->npages = 1; buf->page_shift = (u8)get_order(size) + PAGE_SHIFT; - buf->direct.buf = mlx5_dma_zalloc_coherent_node(dev, size, - &t, node); - if (!buf->direct.buf) + + buf->frags = kzalloc(sizeof(*buf->frags), GFP_KERNEL); + if (!buf->frags) return -ENOMEM; - buf->direct.map = t; + buf->frags->buf = mlx5_dma_zalloc_coherent_node(dev, size, + &t, node); + if (!buf->frags->buf) + goto err_out; + + buf->frags->map = t; while (t & ((1 << buf->page_shift) - 1)) { --buf->page_shift; @@ -91,18 +96,24 @@ int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, } return 0; +err_out: + kfree(buf->frags); + return -ENOMEM; } -int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf) +int mlx5_buf_alloc(struct mlx5_core_dev *dev, + int size, struct mlx5_frag_buf *buf) { return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node); } -EXPORT_SYMBOL_GPL(mlx5_buf_alloc); +EXPORT_SYMBOL(mlx5_buf_alloc); -void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf) +void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf) { - dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf, - buf->direct.map); + dma_free_coherent(&dev->pdev->dev, buf->size, buf->frags->buf, + buf->frags->map); + + kfree(buf->frags); } EXPORT_SYMBOL_GPL(mlx5_buf_free); @@ -147,6 +158,7 @@ err_free_buf: err_out: return -ENOMEM; } +EXPORT_SYMBOL_GPL(mlx5_frag_buf_alloc_node); void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf) { @@ -162,6 +174,7 @@ void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf) } kfree(buf->frags); } +EXPORT_SYMBOL_GPL(mlx5_frag_buf_free); static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev, int node) @@ -275,13 +288,13 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) } EXPORT_SYMBOL_GPL(mlx5_db_free); -void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas) +void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas) { u64 addr; int i; for (i = 0; i < buf->npages; i++) { - addr = buf->direct.map + (i << buf->page_shift); + addr = buf->frags->map + (i << buf->page_shift); pas[i] = cpu_to_be64(addr); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index e9a1fbcc4adf..21cd1703a862 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -359,6 +359,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_HCA_VPORT_GID: case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY: + case MLX5_CMD_OP_QUERY_VNIC_ENV: case MLX5_CMD_OP_QUERY_VPORT_COUNTER: case MLX5_CMD_OP_ALLOC_Q_COUNTER: case MLX5_CMD_OP_QUERY_Q_COUNTER: @@ -501,6 +502,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(MODIFY_HCA_VPORT_CONTEXT); MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_GID); MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_PKEY); + MLX5_COMMAND_STR_CASE(QUERY_VNIC_ENV); MLX5_COMMAND_STR_CASE(QUERY_VPORT_COUNTER); MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); @@ -1802,7 +1804,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) cmd->checksum_disabled = 1; cmd->max_reg_cmds = (1 << cmd->log_sz) - 1; - cmd->bitmask = (1 << cmd->max_reg_cmds) - 1; + cmd->bitmask = (1UL << cmd->max_reg_cmds) - 1; cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; if (cmd->cmdif_rev > CMD_IF_REV) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 1016e05c7ec7..a4179122a279 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -58,8 +58,7 @@ void mlx5_cq_tasklet_cb(unsigned long data) tasklet_ctx.list) { list_del_init(&mcq->tasklet_ctx.list); mcq->tasklet_ctx.comp(mcq); - if (refcount_dec_and_test(&mcq->refcount)) - complete(&mcq->free); + mlx5_cq_put(mcq); if (time_after(jiffies, end)) break; } @@ -80,69 +79,19 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq) * still arrive. */ if (list_empty_careful(&cq->tasklet_ctx.list)) { - refcount_inc(&cq->refcount); + mlx5_cq_hold(cq); list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list); } spin_unlock_irqrestore(&tasklet_ctx->lock, flags); } -void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn) -{ - struct mlx5_core_cq *cq; - struct mlx5_cq_table *table = &dev->priv.cq_table; - - spin_lock(&table->lock); - cq = radix_tree_lookup(&table->tree, cqn); - if (likely(cq)) - refcount_inc(&cq->refcount); - spin_unlock(&table->lock); - - if (!cq) { - mlx5_core_warn(dev, "Completion event for bogus CQ 0x%x\n", cqn); - return; - } - - ++cq->arm_sn; - - cq->comp(cq); - - if (refcount_dec_and_test(&cq->refcount)) - complete(&cq->free); -} - -void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type) -{ - struct mlx5_cq_table *table = &dev->priv.cq_table; - struct mlx5_core_cq *cq; - - spin_lock(&table->lock); - - cq = radix_tree_lookup(&table->tree, cqn); - if (cq) - refcount_inc(&cq->refcount); - - spin_unlock(&table->lock); - - if (!cq) { - mlx5_core_warn(dev, "Async event for bogus CQ 0x%x\n", cqn); - return; - } - - cq->event(cq, event_type); - - if (refcount_dec_and_test(&cq->refcount)) - complete(&cq->free); -} - int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen) { - struct mlx5_cq_table *table = &dev->priv.cq_table; + int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn); + u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)]; u32 out[MLX5_ST_SZ_DW(create_cq_out)]; u32 din[MLX5_ST_SZ_DW(destroy_cq_in)]; - u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)]; - int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), - c_eqn); struct mlx5_eq *eq; int err; @@ -159,6 +108,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->cqn = MLX5_GET(create_cq_out, out, cqn); cq->cons_index = 0; cq->arm_sn = 0; + cq->eq = eq; refcount_set(&cq->refcount, 1); init_completion(&cq->free); if (!cq->comp) @@ -167,12 +117,16 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->tasklet_ctx.priv = &eq->tasklet_ctx; INIT_LIST_HEAD(&cq->tasklet_ctx.list); - spin_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, cq->cqn, cq); - spin_unlock_irq(&table->lock); + /* Add to comp EQ CQ tree to recv comp events */ + err = mlx5_eq_add_cq(eq, cq); if (err) goto err_cmd; + /* Add to async EQ CQ tree to recv async events */ + err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq); + if (err) + goto err_cq_add; + cq->pid = current->pid; err = mlx5_debug_cq_add(dev, cq); if (err) @@ -183,6 +137,8 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, return 0; +err_cq_add: + mlx5_eq_del_cq(eq, cq); err_cmd: memset(din, 0, sizeof(din)); memset(dout, 0, sizeof(dout)); @@ -195,23 +151,17 @@ EXPORT_SYMBOL(mlx5_core_create_cq); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) { - struct mlx5_cq_table *table = &dev->priv.cq_table; u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0}; - struct mlx5_core_cq *tmp; int err; - spin_lock_irq(&table->lock); - tmp = radix_tree_delete(&table->tree, cq->cqn); - spin_unlock_irq(&table->lock); - if (!tmp) { - mlx5_core_warn(dev, "cq 0x%x not found in tree\n", cq->cqn); - return -EINVAL; - } - if (tmp != cq) { - mlx5_core_warn(dev, "corruption on srqn 0x%x\n", cq->cqn); - return -EINVAL; - } + err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq); + if (err) + return err; + + err = mlx5_eq_del_cq(cq->eq, cq); + if (err) + return err; MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ); MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); @@ -222,8 +172,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) synchronize_irq(cq->irqn); mlx5_debug_cq_remove(dev, cq); - if (refcount_dec_and_test(&cq->refcount)) - complete(&cq->free); + mlx5_cq_put(cq); wait_for_completion(&cq->free); return 0; @@ -270,21 +219,3 @@ int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, return mlx5_core_modify_cq(dev, cq, in, sizeof(in)); } EXPORT_SYMBOL(mlx5_core_modify_cq_moderation); - -int mlx5_init_cq_table(struct mlx5_core_dev *dev) -{ - struct mlx5_cq_table *table = &dev->priv.cq_table; - int err; - - memset(table, 0, sizeof(*table)); - spin_lock_init(&table->lock); - INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); - err = mlx5_cq_debugfs_init(dev); - - return err; -} - -void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev) -{ - mlx5_cq_debugfs_cleanup(dev); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 17b723218b0c..b994b80d5714 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -337,6 +337,14 @@ void mlx5_unregister_interface(struct mlx5_interface *intf) } EXPORT_SYMBOL(mlx5_unregister_interface); +void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol) +{ + mutex_lock(&mlx5_intf_mutex); + mlx5_remove_dev_by_protocol(mdev, protocol); + mlx5_add_dev_by_protocol(mdev, protocol); + mutex_unlock(&mlx5_intf_mutex); +} + void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) { struct mlx5_priv *priv = &mdev->priv; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index fd509160c8f6..d93ff567b40d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -246,6 +246,9 @@ const char *parse_fs_dst(struct trace_seq *p, case MLX5_FLOW_DESTINATION_TYPE_COUNTER: trace_seq_printf(p, "counter_id=%u\n", counter_id); break; + case MLX5_FLOW_DESTINATION_TYPE_PORT: + trace_seq_printf(p, "port\n"); + break; } trace_seq_putc(p, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index 80eef4163f52..09f178a3fcab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -136,6 +136,8 @@ TRACE_EVENT(mlx5_fs_del_fg, {MLX5_FLOW_CONTEXT_ACTION_ENCAP, "ENCAP"},\ {MLX5_FLOW_CONTEXT_ACTION_DECAP, "DECAP"},\ {MLX5_FLOW_CONTEXT_ACTION_MOD_HDR, "MOD_HDR"},\ + {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH, "VLAN_PUSH"},\ + {MLX5_FLOW_CONTEXT_ACTION_VLAN_POP, "VLAN_POP"},\ {MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"} TRACE_EVENT(mlx5_fs_set_fte, @@ -163,9 +165,9 @@ TRACE_EVENT(mlx5_fs_set_fte, fs_get_obj(__entry->fg, fte->node.parent); __entry->group_index = __entry->fg->id; __entry->index = fte->index; - __entry->action = fte->action; + __entry->action = fte->action.action; __entry->mask_enable = __entry->fg->mask.match_criteria_enable; - __entry->flow_tag = fte->flow_tag; + __entry->flow_tag = fte->action.flow_tag; memcpy(__entry->mask_outer, MLX5_ADDR_OF(fte_match_param, &__entry->fg->mask.match_criteria, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 4c9360b25532..30cad07be2b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -57,24 +57,12 @@ #define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) -#define MLX5E_HW2SW_MTU(priv, hwmtu) ((hwmtu) - ((priv)->hard_mtu)) -#define MLX5E_SW2HW_MTU(priv, swmtu) ((swmtu) + ((priv)->hard_mtu)) +#define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu)) +#define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu)) #define MLX5E_MAX_DSCP 64 #define MLX5E_MAX_NUM_TC 8 -#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x6 -#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa -#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd - -#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE 0x1 -#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa -#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd - -#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 -#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x3 -#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 - #define MLX5_RX_HEADROOM NET_SKB_PAD #define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) @@ -93,15 +81,31 @@ #define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) -#define MLX5_MPWRQ_STRIDES_PER_PAGE (MLX5_MPWRQ_NUM_STRIDES >> \ - MLX5_MPWRQ_WQE_PAGE_ORDER) #define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2) -#define MLX5E_REQUIRED_MTTS(wqes) \ - (wqes * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8)) -#define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) - 1 <= U16_MAX) +#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8)) +#define MLX5E_LOG_ALIGNED_MPWQE_PPW (ilog2(MLX5E_REQUIRED_WQE_MTTS)) +#define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS) +#define MLX5E_MAX_RQ_NUM_MTTS \ + ((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */ +#define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024)) +#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW \ + (ilog2(MLX5E_MAX_RQ_NUM_MTTS / MLX5E_REQUIRED_WQE_MTTS)) +#define MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW \ + (MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW + \ + (MLX5_MPWRQ_LOG_WQE_SZ - MLX5E_ORDER2_MAX_PACKET_MTU)) + +#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x6 +#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa +#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd + +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE 0x1 +#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa +#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE min_t(u8, 0xd, \ + MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW) + +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 -#define MLX5_UMR_ALIGN (2048) #define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (256) #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) @@ -124,9 +128,15 @@ #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ +#define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */ -#define MLX5E_ICOSQ_MAX_WQEBBS \ - (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB)) +#define MLX5E_UMR_WQE_INLINE_SZ \ + (sizeof(struct mlx5e_umr_wqe) + \ + ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(struct mlx5_mtt), \ + MLX5_UMR_MTT_ALIGNMENT)) +#define MLX5E_UMR_WQEBBS \ + (DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB)) +#define MLX5E_ICOSQ_MAX_WQEBBS MLX5E_UMR_WQEBBS #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) #define MLX5E_XDP_TX_DS_COUNT \ @@ -156,26 +166,6 @@ static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) } } -static inline int mlx5_min_log_rq_size(int wq_type) -{ - switch (wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW; - default: - return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; - } -} - -static inline int mlx5_max_log_rq_size(int wq_type) -{ - switch (wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW; - default: - return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; - } -} - static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) { return is_kdump_kernel() ? @@ -198,7 +188,7 @@ struct mlx5e_umr_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_umr_ctrl_seg uctrl; struct mlx5_mkey_seg mkc; - struct mlx5_wqe_data_seg data; + struct mlx5_mtt inline_mtts[0]; }; extern const char mlx5e_self_tests[][ETH_GSTRING_LEN]; @@ -207,12 +197,14 @@ static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = { "rx_cqe_moder", "tx_cqe_moder", "rx_cqe_compress", + "rx_striding_rq", }; enum mlx5e_priv_flag { MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0), MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1), MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2), + MLX5E_PFLAG_RX_STRIDING_RQ = (1 << 3), }; #define MLX5E_SET_PFLAG(params, pflag, enable) \ @@ -232,10 +224,7 @@ enum mlx5e_priv_flag { struct mlx5e_params { u8 log_sq_size; u8 rq_wq_type; - u16 rq_headroom; - u8 mpwqe_log_stride_sz; - u8 mpwqe_log_num_strides; - u8 log_rq_size; + u8 log_rq_mtu_frames; u16 num_channels; u8 num_tc; bool rx_cqe_compress_def; @@ -243,7 +232,6 @@ struct mlx5e_params { struct net_dim_cq_moder tx_cq_moderation; bool lro_en; u32 lro_wqe_sz; - u16 tx_max_inline; u8 tx_min_inline_mode; u8 rss_hfunc; u8 toeplitz_hash_key[40]; @@ -254,6 +242,8 @@ struct mlx5e_params { u32 lro_timeout; u32 pflags; struct bpf_prog *xdp_prog; + unsigned int sw_mtu; + int hard_mtu; }; #ifdef CONFIG_MLX5_CORE_EN_DCB @@ -336,6 +326,7 @@ struct mlx5e_sq_dma { enum { MLX5E_SQ_STATE_ENABLED, + MLX5E_SQ_STATE_RECOVERING, MLX5E_SQ_STATE_IPSEC, }; @@ -369,7 +360,6 @@ struct mlx5e_txqsq { void __iomem *uar_map; struct netdev_queue *txq; u32 sqn; - u16 max_inline; u8 min_inline_mode; u16 edge; struct device *pdev; @@ -383,6 +373,10 @@ struct mlx5e_txqsq { struct mlx5e_channel *channel; int txq_ix; u32 rate_limit; + struct mlx5e_txqsq_recover { + struct work_struct recover_work; + u64 last_recover; + } recover; } ____cacheline_aligned_in_smp; struct mlx5e_xdpsq { @@ -432,7 +426,6 @@ struct mlx5e_icosq { void __iomem *uar_map; u32 sqn; u16 edge; - __be32 mkey_be; unsigned long state; /* control path */ @@ -457,16 +450,13 @@ struct mlx5e_wqe_frag_info { }; struct mlx5e_umr_dma_info { - __be64 *mtt; - dma_addr_t mtt_addr; struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE]; - struct mlx5e_umr_wqe wqe; }; struct mlx5e_mpw_info { struct mlx5e_umr_dma_info umr; u16 consumed_strides; - u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; + DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE); }; /* a single cache unit is capable to serve one napi call (for non-striding rq) @@ -483,9 +473,16 @@ struct mlx5e_page_cache { struct mlx5e_rq; typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); +typedef struct sk_buff * +(*mlx5e_fp_skb_from_cqe_mpwrq)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, u32 head_offset, u32 page_idx); typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq); typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16); +enum mlx5e_rq_flag { + MLX5E_RQ_FLAG_XDP_XMIT = BIT(0), +}; + struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; @@ -496,12 +493,12 @@ struct mlx5e_rq { u32 frag_sz; /* max possible skb frag_sz */ union { bool page_reuse; - bool xdp_xmit; }; } wqe; struct { + struct mlx5e_umr_wqe umr_wqe; struct mlx5e_mpw_info *info; - void *mtt_no_align; + mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq; u16 num_strides; u8 log_stride_sz; bool umr_in_progress; @@ -533,7 +530,9 @@ struct mlx5e_rq { /* XDP */ struct bpf_prog *xdp_prog; + unsigned int hw_mtu; struct mlx5e_xdpsq xdpsq; + DECLARE_BITMAP(flags, 8); /* control */ struct mlx5_wq_ctrl wq_ctrl; @@ -766,7 +765,6 @@ struct mlx5e_priv { struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; u32 tx_rates[MLX5E_MAX_NUM_SQS]; - int hard_mtu; struct mlx5e_flow_steering fs; struct mlx5e_vxlan_db vxlan; @@ -781,7 +779,8 @@ struct mlx5e_priv { struct net_device *netdev; struct mlx5e_stats stats; struct hwtstamp_config tstamp; - u16 q_counter; + u16 q_counter; + u16 drop_rq_q_counter; #ifdef CONFIG_MLX5_CORE_EN_DCB struct mlx5e_dcbx dcbx; #endif @@ -831,6 +830,10 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); +bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev); +bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, + struct mlx5e_params *params); + void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, bool recycle); void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); @@ -840,6 +843,12 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq); void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); +struct sk_buff * +mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, u32 head_offset, u32 page_idx); +struct sk_buff * +mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, u32 head_offset, u32 page_idx); void mlx5e_update_stats(struct mlx5e_priv *priv); @@ -916,9 +925,9 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); +void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, - u8 rq_type); + struct mlx5e_params *params); static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) { @@ -970,11 +979,6 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc); } -static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) -{ - return wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); -} - extern const struct ethtool_ops mlx5e_ethtool_ops; #ifdef CONFIG_MLX5_CORE_EN_DCB extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; @@ -1010,7 +1014,6 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id); #endif -u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev); int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 *in, int inlen); void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, @@ -1061,7 +1064,6 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); void mlx5e_update_stats_work(struct work_struct *work); -u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); int mlx5e_bits_invert(unsigned long a, int size); @@ -1102,7 +1104,7 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - u16 max_channels); + u16 max_channels, u16 mtu); u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); void mlx5e_rx_dim_work(struct work_struct *work); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index bac5103efad3..cf58c9637904 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -38,17 +38,24 @@ #include <linux/module.h> #include "en.h" -#include "accel/ipsec.h" #include "en_accel/ipsec.h" #include "en_accel/ipsec_rxtx.h" -struct mlx5e_ipsec_sa_entry { - struct hlist_node hlist; /* Item in SADB_RX hashtable */ - unsigned int handle; /* Handle in SADB_RX */ - struct xfrm_state *x; - struct mlx5e_ipsec *ipsec; - void *context; -}; + +static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa; + + if (!x) + return NULL; + + sa = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; + if (!sa) + return NULL; + + WARN_ON(sa->x != x); + return sa; +} struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec, unsigned int handle) @@ -74,18 +81,16 @@ static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry) unsigned long flags; int ret; - spin_lock_irqsave(&ipsec->sadb_rx_lock, flags); ret = ida_simple_get(&ipsec->halloc, 1, 0, GFP_KERNEL); if (ret < 0) - goto out; + return ret; + spin_lock_irqsave(&ipsec->sadb_rx_lock, flags); sa_entry->handle = ret; hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle); - ret = 0; - -out: spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags); - return ret; + + return 0; } static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry) @@ -101,87 +106,99 @@ static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry) static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry) { struct mlx5e_ipsec *ipsec = sa_entry->ipsec; - unsigned long flags; - /* Wait for the hash_del_rcu call in sadb_rx_del to affect data path */ - synchronize_rcu(); - spin_lock_irqsave(&ipsec->sadb_rx_lock, flags); + /* xfrm already doing sync rcu between del and free callbacks */ + ida_simple_remove(&ipsec->halloc, sa_entry->handle); - spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags); } -static enum mlx5_accel_ipsec_enc_mode mlx5e_ipsec_enc_mode(struct xfrm_state *x) +static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) { - unsigned int key_len = (x->aead->alg_key_len + 7) / 8 - 4; - - switch (key_len) { - case 16: - return MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128; - case 32: - return MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128; - default: - netdev_warn(x->xso.dev, "Bad key len: %d for alg %s\n", - key_len, x->aead->alg_name); - return -1; + struct xfrm_replay_state_esn *replay_esn; + u32 seq_bottom; + u8 overlap; + u32 *esn; + + if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) { + sa_entry->esn_state.trigger = 0; + return false; + } + + replay_esn = sa_entry->x->replay_esn; + seq_bottom = replay_esn->seq - replay_esn->replay_window + 1; + overlap = sa_entry->esn_state.overlap; + + sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x, + htonl(seq_bottom)); + esn = &sa_entry->esn_state.esn; + + sa_entry->esn_state.trigger = 1; + if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) { + ++(*esn); + sa_entry->esn_state.overlap = 0; + return true; + } else if (unlikely(!overlap && + (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) { + sa_entry->esn_state.overlap = 1; + return true; } + + return false; } -static void mlx5e_ipsec_build_hw_sa(u32 op, struct mlx5e_ipsec_sa_entry *sa_entry, - struct mlx5_accel_ipsec_sa *hw_sa) +static void +mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_accel_esp_xfrm_attrs *attrs) { struct xfrm_state *x = sa_entry->x; + struct aes_gcm_keymat *aes_gcm = &attrs->keymat.aes_gcm; struct aead_geniv_ctx *geniv_ctx; - unsigned int crypto_data_len; struct crypto_aead *aead; - unsigned int key_len; + unsigned int crypto_data_len, key_len; int ivsize; - memset(hw_sa, 0, sizeof(*hw_sa)); - - if (op == MLX5_IPSEC_CMD_ADD_SA) { - crypto_data_len = (x->aead->alg_key_len + 7) / 8; - key_len = crypto_data_len - 4; /* 4 bytes salt at end */ - aead = x->data; - geniv_ctx = crypto_aead_ctx(aead); - ivsize = crypto_aead_ivsize(aead); - - memcpy(&hw_sa->key_enc, x->aead->alg_key, key_len); - /* Duplicate 128 bit key twice according to HW layout */ - if (key_len == 16) - memcpy(&hw_sa->key_enc[16], x->aead->alg_key, key_len); - memcpy(&hw_sa->gcm.salt_iv, geniv_ctx->salt, ivsize); - hw_sa->gcm.salt = *((__be32 *)(x->aead->alg_key + key_len)); - } + memset(attrs, 0, sizeof(*attrs)); - hw_sa->cmd = htonl(op); - hw_sa->flags |= MLX5_IPSEC_SADB_SA_VALID | MLX5_IPSEC_SADB_SPI_EN; - if (x->props.family == AF_INET) { - hw_sa->sip[3] = x->props.saddr.a4; - hw_sa->dip[3] = x->id.daddr.a4; - hw_sa->sip_masklen = 32; - hw_sa->dip_masklen = 32; - } else { - memcpy(hw_sa->sip, x->props.saddr.a6, sizeof(hw_sa->sip)); - memcpy(hw_sa->dip, x->id.daddr.a6, sizeof(hw_sa->dip)); - hw_sa->sip_masklen = 128; - hw_sa->dip_masklen = 128; - hw_sa->flags |= MLX5_IPSEC_SADB_IPV6; - } - hw_sa->spi = x->id.spi; - hw_sa->sw_sa_handle = htonl(sa_entry->handle); - switch (x->id.proto) { - case IPPROTO_ESP: - hw_sa->flags |= MLX5_IPSEC_SADB_IP_ESP; - break; - case IPPROTO_AH: - hw_sa->flags |= MLX5_IPSEC_SADB_IP_AH; - break; - default: - break; + /* key */ + crypto_data_len = (x->aead->alg_key_len + 7) / 8; + key_len = crypto_data_len - 4; /* 4 bytes salt at end */ + + memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len); + aes_gcm->key_len = key_len * 8; + + /* salt and seq_iv */ + aead = x->data; + geniv_ctx = crypto_aead_ctx(aead); + ivsize = crypto_aead_ivsize(aead); + memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize); + memcpy(&aes_gcm->salt, x->aead->alg_key + key_len, + sizeof(aes_gcm->salt)); + + /* iv len */ + aes_gcm->icv_len = x->aead->alg_icv_len; + + /* esn */ + if (sa_entry->esn_state.trigger) { + attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; + attrs->esn = sa_entry->esn_state.esn; + if (sa_entry->esn_state.overlap) + attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; } - hw_sa->enc_mode = mlx5e_ipsec_enc_mode(x); - if (!(x->xso.flags & XFRM_OFFLOAD_INBOUND)) - hw_sa->flags |= MLX5_IPSEC_SADB_DIR_SX; + + /* rx handle */ + attrs->sa_handle = sa_entry->handle; + + /* algo type */ + attrs->keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; + + /* action */ + attrs->action = (!(x->xso.flags & XFRM_OFFLOAD_INBOUND)) ? + MLX5_ACCEL_ESP_ACTION_ENCRYPT : + MLX5_ACCEL_ESP_ACTION_DECRYPT; + /* flags */ + attrs->flags |= (x->props.mode == XFRM_MODE_TRANSPORT) ? + MLX5_ACCEL_ESP_FLAGS_TRANSPORT : + MLX5_ACCEL_ESP_FLAGS_TUNNEL; } static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x) @@ -203,7 +220,9 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x) netdev_info(netdev, "Cannot offload compressed xfrm states\n"); return -EINVAL; } - if (x->props.flags & XFRM_STATE_ESN) { + if (x->props.flags & XFRM_STATE_ESN && + !(mlx5_accel_ipsec_device_caps(priv->mdev) & + MLX5_ACCEL_IPSEC_CAP_ESN)) { netdev_info(netdev, "Cannot offload ESN xfrm states\n"); return -EINVAL; } @@ -251,7 +270,8 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x) return -EINVAL; } if (x->props.family == AF_INET6 && - !(mlx5_accel_ipsec_device_caps(priv->mdev) & MLX5_ACCEL_IPSEC_IPV6)) { + !(mlx5_accel_ipsec_device_caps(priv->mdev) & + MLX5_ACCEL_IPSEC_CAP_IPV6)) { netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n"); return -EINVAL; } @@ -262,9 +282,10 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = NULL; struct net_device *netdev = x->xso.dev; - struct mlx5_accel_ipsec_sa hw_sa; + struct mlx5_accel_esp_xfrm_attrs attrs; struct mlx5e_priv *priv; - void *context; + __be32 saddr[4] = {0}, daddr[4] = {0}, spi; + bool is_ipv6 = false; int err; priv = netdev_priv(netdev); @@ -291,22 +312,49 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x) netdev_info(netdev, "Failed adding to SADB_RX: %d\n", err); goto err_entry; } + } else { + sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ? + mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv; } - mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_ADD_SA, sa_entry, &hw_sa); - context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa); - if (IS_ERR(context)) { - err = PTR_ERR(context); + /* check esn */ + mlx5e_ipsec_update_esn_state(sa_entry); + + /* create xfrm */ + mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs); + sa_entry->xfrm = + mlx5_accel_esp_create_xfrm(priv->mdev, &attrs, + MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA); + if (IS_ERR(sa_entry->xfrm)) { + err = PTR_ERR(sa_entry->xfrm); goto err_sadb_rx; } - err = mlx5_accel_ipsec_sa_cmd_wait(context); - if (err) - goto err_sadb_rx; + /* create hw context */ + if (x->props.family == AF_INET) { + saddr[3] = x->props.saddr.a4; + daddr[3] = x->id.daddr.a4; + } else { + memcpy(saddr, x->props.saddr.a6, sizeof(saddr)); + memcpy(daddr, x->id.daddr.a6, sizeof(daddr)); + is_ipv6 = true; + } + spi = x->id.spi; + sa_entry->hw_context = + mlx5_accel_esp_create_hw_context(priv->mdev, + sa_entry->xfrm, + saddr, daddr, spi, + is_ipv6); + if (IS_ERR(sa_entry->hw_context)) { + err = PTR_ERR(sa_entry->hw_context); + goto err_xfrm; + } x->xso.offload_handle = (unsigned long)sa_entry; goto out; +err_xfrm: + mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm); err_sadb_rx: if (x->xso.flags & XFRM_OFFLOAD_INBOUND) { mlx5e_ipsec_sadb_rx_del(sa_entry); @@ -320,43 +368,26 @@ out: static void mlx5e_xfrm_del_state(struct xfrm_state *x) { - struct mlx5e_ipsec_sa_entry *sa_entry; - struct mlx5_accel_ipsec_sa hw_sa; - void *context; + struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); - if (!x->xso.offload_handle) + if (!sa_entry) return; - sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; - WARN_ON(sa_entry->x != x); - if (x->xso.flags & XFRM_OFFLOAD_INBOUND) mlx5e_ipsec_sadb_rx_del(sa_entry); - - mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_DEL_SA, sa_entry, &hw_sa); - context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa); - if (IS_ERR(context)) - return; - - sa_entry->context = context; } static void mlx5e_xfrm_free_state(struct xfrm_state *x) { - struct mlx5e_ipsec_sa_entry *sa_entry; - int res; + struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); - if (!x->xso.offload_handle) + if (!sa_entry) return; - sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; - WARN_ON(sa_entry->x != x); - - res = mlx5_accel_ipsec_sa_cmd_wait(sa_entry->context); - sa_entry->context = NULL; - if (res) { - /* Leftover object will leak */ - return; + if (sa_entry->hw_context) { + flush_workqueue(sa_entry->ipsec->wq); + mlx5_accel_esp_free_hw_context(sa_entry->hw_context); + mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm); } if (x->xso.flags & XFRM_OFFLOAD_INBOUND) @@ -383,6 +414,14 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv) ida_init(&ipsec->halloc); ipsec->en_priv = priv; ipsec->en_priv->ipsec = ipsec; + ipsec->no_trailer = !!(mlx5_accel_ipsec_device_caps(priv->mdev) & + MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER); + ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0, + priv->netdev->name); + if (!ipsec->wq) { + kfree(ipsec); + return -ENOMEM; + } netdev_dbg(priv->netdev, "IPSec attached to netdevice\n"); return 0; } @@ -394,6 +433,9 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) if (!ipsec) return; + drain_workqueue(ipsec->wq); + destroy_workqueue(ipsec->wq); + ida_destroy(&ipsec->halloc); kfree(ipsec); priv->ipsec = NULL; @@ -414,11 +456,58 @@ static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x) return true; } +struct mlx5e_ipsec_modify_state_work { + struct work_struct work; + struct mlx5_accel_esp_xfrm_attrs attrs; + struct mlx5e_ipsec_sa_entry *sa_entry; +}; + +static void _update_xfrm_state(struct work_struct *work) +{ + int ret; + struct mlx5e_ipsec_modify_state_work *modify_work = + container_of(work, struct mlx5e_ipsec_modify_state_work, work); + struct mlx5e_ipsec_sa_entry *sa_entry = modify_work->sa_entry; + + ret = mlx5_accel_esp_modify_xfrm(sa_entry->xfrm, + &modify_work->attrs); + if (ret) + netdev_warn(sa_entry->ipsec->en_priv->netdev, + "Not an IPSec offload device\n"); + + kfree(modify_work); +} + +static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); + struct mlx5e_ipsec_modify_state_work *modify_work; + bool need_update; + + if (!sa_entry) + return; + + need_update = mlx5e_ipsec_update_esn_state(sa_entry); + if (!need_update) + return; + + modify_work = kzalloc(sizeof(*modify_work), GFP_ATOMIC); + if (!modify_work) + return; + + mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs); + modify_work->sa_entry = sa_entry; + + INIT_WORK(&modify_work->work, _update_xfrm_state); + WARN_ON(!queue_work(sa_entry->ipsec->wq, &modify_work->work)); +} + static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = { .xdo_dev_state_add = mlx5e_xfrm_add_state, .xdo_dev_state_delete = mlx5e_xfrm_del_state, .xdo_dev_state_free = mlx5e_xfrm_free_state, .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, + .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, }; void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) @@ -429,7 +518,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) if (!priv->ipsec) return; - if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_ESP) || + if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) || !MLX5_CAP_ETH(mdev, swp)) { mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n"); return; @@ -448,7 +537,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) netdev->features |= NETIF_F_HW_ESP_TX_CSUM; netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM; - if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_LSO) || + if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_LSO) || !MLX5_CAP_ETH(mdev, swp_lso)) { mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n"); return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 56e00baf16cc..1198fc1eba4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -40,7 +40,11 @@ #include <net/xfrm.h> #include <linux/idr.h> +#include "accel/ipsec.h" + #define MLX5E_IPSEC_SADB_RX_BITS 10 +#define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L + #define MLX5E_METADATA_ETHER_TYPE (0x8CE4) #define MLX5E_METADATA_ETHER_LEN 8 @@ -77,10 +81,30 @@ struct mlx5e_ipsec_stats { struct mlx5e_ipsec { struct mlx5e_priv *en_priv; DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS); + bool no_trailer; spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */ struct ida halloc; struct mlx5e_ipsec_sw_stats sw_stats; struct mlx5e_ipsec_stats stats; + struct workqueue_struct *wq; +}; + +struct mlx5e_ipsec_esn_state { + u32 esn; + u8 trigger: 1; + u8 overlap: 1; +}; + +struct mlx5e_ipsec_sa_entry { + struct hlist_node hlist; /* Item in SADB_RX hashtable */ + struct mlx5e_ipsec_esn_state esn_state; + unsigned int handle; /* Handle in SADB_RX */ + struct xfrm_state *x; + struct mlx5e_ipsec *ipsec; + struct mlx5_accel_esp_xfrm *xfrm; + void *hw_context; + void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo); }; void mlx5e_ipsec_build_inverse_table(void); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 6a7c8b04447e..c245d8e78509 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -42,10 +42,11 @@ enum { MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11, MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12, + MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO = 0x17, }; struct mlx5e_ipsec_rx_metadata { - unsigned char reserved; + unsigned char nexthdr; __be32 sa_handle; } __packed; @@ -175,7 +176,30 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb, } } -static void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_offload *xo) +void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo) +{ + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + __u32 oseq = replay_esn->oseq; + int iv_offset; + __be64 seqno; + u32 seq_hi; + + if (unlikely(skb_is_gso(skb) && oseq < MLX5E_IPSEC_ESN_SCOPE_MID && + MLX5E_IPSEC_ESN_SCOPE_MID < (oseq - skb_shinfo(skb)->gso_segs))) { + seq_hi = xo->seq.hi - 1; + } else { + seq_hi = xo->seq.hi; + } + + /* Place the SN in the IV field */ + seqno = cpu_to_be64(xo->seq.low + ((u64)seq_hi << 32)); + iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr); + skb_store_bits(skb, iv_offset, &seqno, 8); +} + +void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo) { int iv_offset; __be64 seqno; @@ -227,6 +251,7 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, struct mlx5e_priv *priv = netdev_priv(netdev); struct xfrm_offload *xo = xfrm_offload(skb); struct mlx5e_ipsec_metadata *mdata; + struct mlx5e_ipsec_sa_entry *sa_entry; struct xfrm_state *x; if (!xo) @@ -261,7 +286,8 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, goto drop; } mlx5e_ipsec_set_swp(skb, &wqe->eth, x->props.mode, xo); - mlx5e_ipsec_set_iv(skb, xo); + sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; + sa_entry->set_iv_op(skb, x, xo); mlx5e_ipsec_set_metadata(skb, mdata, xo); return skb; @@ -301,10 +327,17 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb, switch (mdata->syndrome) { case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED: xo->status = CRYPTO_SUCCESS; + if (likely(priv->ipsec->no_trailer)) { + xo->flags |= XFRM_ESP_NO_TRAILER; + xo->proto = mdata->content.rx.nexthdr; + } break; case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED: xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED; break; + case MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO: + xo->status = CRYPTO_INVALID_PROTOCOL; + break; default: atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome); return NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index e37ae2598dbb..2bfbbef1b054 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -37,6 +37,7 @@ #ifdef CONFIG_MLX5_EN_IPSEC #include <linux/skbuff.h> +#include <net/xfrm.h> #include "en.h" struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, @@ -46,6 +47,10 @@ void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_ipsec_inverse_table_init(void); bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev, netdev_features_t features); +void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo); +void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo); struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, struct mlx5e_tx_wqe *wqe, struct sk_buff *skb); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 59ebfdae6695..37fd0245b6c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -203,9 +203,6 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, { int i, idx = 0; - if (!data) - return; - mutex_lock(&priv->state_lock); mlx5e_update_stats(priv); mutex_unlock(&priv->state_lock); @@ -223,60 +220,12 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, mlx5e_ethtool_get_ethtool_stats(priv, stats, data); } -static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type, - int num_wqe) -{ - int packets_per_wqe; - int stride_size; - int num_strides; - int wqe_size; - - if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) - return num_wqe; - - stride_size = 1 << priv->channels.params.mpwqe_log_stride_sz; - num_strides = 1 << priv->channels.params.mpwqe_log_num_strides; - wqe_size = stride_size * num_strides; - - packets_per_wqe = wqe_size / - ALIGN(ETH_DATA_LEN, stride_size); - return (1 << (order_base_2(num_wqe * packets_per_wqe) - 1)); -} - -static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type, - int num_packets) -{ - int packets_per_wqe; - int stride_size; - int num_strides; - int wqe_size; - int num_wqes; - - if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) - return num_packets; - - stride_size = 1 << priv->channels.params.mpwqe_log_stride_sz; - num_strides = 1 << priv->channels.params.mpwqe_log_num_strides; - wqe_size = stride_size * num_strides; - - num_packets = (1 << order_base_2(num_packets)); - - packets_per_wqe = wqe_size / - ALIGN(ETH_DATA_LEN, stride_size); - num_wqes = DIV_ROUND_UP(num_packets, packets_per_wqe); - return 1 << (order_base_2(num_wqes)); -} - void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, struct ethtool_ringparam *param) { - int rq_wq_type = priv->channels.params.rq_wq_type; - - param->rx_max_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, - 1 << mlx5_max_log_rq_size(rq_wq_type)); + param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; - param->rx_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, - 1 << priv->channels.params.log_rq_size); + param->rx_pending = 1 << priv->channels.params.log_rq_mtu_frames; param->tx_pending = 1 << priv->channels.params.log_sq_size; } @@ -291,13 +240,9 @@ static void mlx5e_get_ringparam(struct net_device *dev, int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, struct ethtool_ringparam *param) { - int rq_wq_type = priv->channels.params.rq_wq_type; struct mlx5e_channels new_channels = {}; - u32 rx_pending_wqes; - u32 min_rq_size; u8 log_rq_size; u8 log_sq_size; - u32 num_mtts; int err = 0; if (param->rx_jumbo_pending) { @@ -311,23 +256,10 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, return -EINVAL; } - min_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, - 1 << mlx5_min_log_rq_size(rq_wq_type)); - rx_pending_wqes = mlx5e_packets_to_rx_wqes(priv, rq_wq_type, - param->rx_pending); - - if (param->rx_pending < min_rq_size) { + if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) { netdev_info(priv->netdev, "%s: rx_pending (%d) < min (%d)\n", __func__, param->rx_pending, - min_rq_size); - return -EINVAL; - } - - num_mtts = MLX5E_REQUIRED_MTTS(rx_pending_wqes); - if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && - !MLX5E_VALID_NUM_MTTS(num_mtts)) { - netdev_info(priv->netdev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n", - __func__, param->rx_pending); + 1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE); return -EINVAL; } @@ -338,17 +270,17 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, return -EINVAL; } - log_rq_size = order_base_2(rx_pending_wqes); + log_rq_size = order_base_2(param->rx_pending); log_sq_size = order_base_2(param->tx_pending); - if (log_rq_size == priv->channels.params.log_rq_size && + if (log_rq_size == priv->channels.params.log_rq_mtu_frames && log_sq_size == priv->channels.params.log_sq_size) return 0; mutex_lock(&priv->state_lock); new_channels.params = priv->channels.params; - new_channels.params.log_rq_size = log_rq_size; + new_channels.params.log_rq_mtu_frames = log_rq_size; new_channels.params.log_sq_size = log_sq_size; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { @@ -1083,16 +1015,66 @@ static int mlx5e_get_rxnfc(struct net_device *netdev, return err; } +#define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC 100 +#define MLX5E_PFC_PREVEN_TOUT_MAX_MSEC 8000 +#define MLX5E_PFC_PREVEN_MINOR_PRECENT 85 +#define MLX5E_PFC_PREVEN_TOUT_MIN_MSEC 80 +#define MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout) \ + max_t(u16, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC, \ + (critical_tout * MLX5E_PFC_PREVEN_MINOR_PRECENT) / 100) + +static int mlx5e_get_pfc_prevention_tout(struct net_device *netdev, + u16 *pfc_prevention_tout) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) || + !MLX5_CAP_DEBUG((priv)->mdev, stall_detect)) + return -EOPNOTSUPP; + + return mlx5_query_port_stall_watermark(mdev, pfc_prevention_tout, NULL); +} + +static int mlx5e_set_pfc_prevention_tout(struct net_device *netdev, + u16 pfc_preven) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 critical_tout; + u16 minor; + + if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) || + !MLX5_CAP_DEBUG((priv)->mdev, stall_detect)) + return -EOPNOTSUPP; + + critical_tout = (pfc_preven == PFC_STORM_PREVENTION_AUTO) ? + MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC : + pfc_preven; + + if (critical_tout != PFC_STORM_PREVENTION_DISABLE && + (critical_tout > MLX5E_PFC_PREVEN_TOUT_MAX_MSEC || + critical_tout < MLX5E_PFC_PREVEN_TOUT_MIN_MSEC)) { + netdev_info(netdev, "%s: pfc prevention tout not in range (%d-%d)\n", + __func__, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC, + MLX5E_PFC_PREVEN_TOUT_MAX_MSEC); + return -EINVAL; + } + + minor = MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout); + return mlx5_set_port_stall_watermark(mdev, critical_tout, + minor); +} + static int mlx5e_get_tunable(struct net_device *dev, const struct ethtool_tunable *tuna, void *data) { - const struct mlx5e_priv *priv = netdev_priv(dev); - int err = 0; + int err; switch (tuna->id) { - case ETHTOOL_TX_COPYBREAK: - *(u32 *)data = priv->channels.params.tx_max_inline; + case ETHTOOL_PFC_PREVENTION_TOUT: + err = mlx5e_get_pfc_prevention_tout(dev, data); break; default: err = -EINVAL; @@ -1107,34 +1089,13 @@ static int mlx5e_set_tunable(struct net_device *dev, const void *data) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_channels new_channels = {}; - int err = 0; - u32 val; + int err; mutex_lock(&priv->state_lock); switch (tuna->id) { - case ETHTOOL_TX_COPYBREAK: - val = *(u32 *)data; - if (val > mlx5e_get_max_inline_cap(mdev)) { - err = -EINVAL; - break; - } - - new_channels.params = priv->channels.params; - new_channels.params.tx_max_inline = val; - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - break; - } - - err = mlx5e_open_channels(priv, &new_channels); - if (err) - break; - mlx5e_switch_priv_channels(priv, &new_channels, NULL); - + case ETHTOOL_PFC_PREVENTION_TOUT: + err = mlx5e_set_pfc_prevention_tout(dev, *(u16 *)data); break; default: err = -EINVAL; @@ -1524,11 +1485,6 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val new_channels.params = priv->channels.params; MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); - new_channels.params.mpwqe_log_stride_sz = - MLX5E_MPWQE_STRIDE_SZ(priv->mdev, new_val); - new_channels.params.mpwqe_log_num_strides = - MLX5_MPWRQ_LOG_WQE_SZ - new_channels.params.mpwqe_log_stride_sz; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; return 0; @@ -1566,6 +1522,38 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, return 0; } +static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channels new_channels = {}; + int err; + + if (enable) { + if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) + return -EOPNOTSUPP; + if (!mlx5e_striding_rq_possible(mdev, &priv->channels.params)) + return -EINVAL; + } + + new_channels.params = priv->channels.params; + + MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_STRIDING_RQ, enable); + mlx5e_set_rq_type(mdev, &new_channels.params); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + return 0; + } + + err = mlx5e_open_channels(priv, &new_channels); + if (err) + return err; + + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + return 0; +} + static int mlx5e_handle_pflag(struct net_device *netdev, u32 wanted_flags, enum mlx5e_priv_flag flag, @@ -1611,6 +1599,12 @@ static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags) err = mlx5e_handle_pflag(netdev, pflags, MLX5E_PFLAG_RX_CQE_COMPRESS, set_pflag_rx_cqe_compress); + if (err) + goto out; + + err = mlx5e_handle_pflag(netdev, pflags, + MLX5E_PFLAG_RX_STRIDING_RQ, + set_pflag_rx_striding_rq); out: mutex_unlock(&priv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9b4827d36e3e..0aab3afc6885 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -71,56 +71,145 @@ struct mlx5e_channel_param { struct mlx5e_cq_param icosq_cq; }; -static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) +bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { - return MLX5_CAP_GEN(mdev, striding_rq) && + bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) && MLX5_CAP_ETH(mdev, reg_umr_sq); + u16 max_wqe_sz_cap = MLX5_CAP_GEN(mdev, max_wqe_sz_sq); + bool inline_umr = MLX5E_UMR_WQE_INLINE_SZ <= max_wqe_sz_cap; + + if (!striding_rq_umr) + return false; + if (!inline_umr) { + mlx5_core_warn(mdev, "Cannot support Striding RQ: UMR WQE size (%d) exceeds maximum supported (%d).\n", + (int)MLX5E_UMR_WQE_INLINE_SZ, max_wqe_sz_cap); + return false; + } + return true; +} + +static u32 mlx5e_mpwqe_get_linear_frag_sz(struct mlx5e_params *params) +{ + if (!params->xdp_prog) { + u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + u16 rq_headroom = MLX5_RX_HEADROOM + NET_IP_ALIGN; + + return MLX5_SKB_FRAG_SZ(rq_headroom + hw_mtu); + } + + return PAGE_SIZE; +} + +static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params) +{ + u32 linear_frag_sz = mlx5e_mpwqe_get_linear_frag_sz(params); + + return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz); +} + +static bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + u32 frag_sz = mlx5e_mpwqe_get_linear_frag_sz(params); + s8 signed_log_num_strides_param; + u8 log_num_strides; + + if (params->lro_en || frag_sz > PAGE_SIZE) + return false; + + if (MLX5_CAP_GEN(mdev, ext_stride_num_range)) + return true; + + log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz); + signed_log_num_strides_param = + (s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE; + + return signed_log_num_strides_param >= 0; +} + +static u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params) +{ + if (params->log_rq_mtu_frames < + mlx5e_mpwqe_log_pkts_per_wqe(params) + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW) + return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW; + + return params->log_rq_mtu_frames - mlx5e_mpwqe_log_pkts_per_wqe(params); +} + +static u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params)) + return order_base_2(mlx5e_mpwqe_get_linear_frag_sz(params)); + + return MLX5E_MPWQE_STRIDE_SZ(mdev, + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); +} + +static u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + return MLX5_MPWRQ_LOG_WQE_SZ - + mlx5e_mpwqe_get_log_stride_size(mdev, params); +} + +static u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + u16 linear_rq_headroom = params->xdp_prog ? + XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM; + + linear_rq_headroom += NET_IP_ALIGN; + + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST) + return linear_rq_headroom; + + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params)) + return linear_rq_headroom; + + return 0; } void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, u8 rq_type) + struct mlx5e_params *params) { - params->rq_wq_type = rq_type; params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + params->log_rq_mtu_frames = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : + MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - params->log_rq_size = is_kdump_kernel() ? - MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW : - MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; - params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev, - MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); - params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - - params->mpwqe_log_stride_sz; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - params->log_rq_size = is_kdump_kernel() ? - MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : - MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; - params->rq_headroom = params->xdp_prog ? - XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM; - params->rq_headroom += NET_IP_ALIGN; - /* Extra room needed for build_skb */ - params->lro_wqe_sz -= params->rq_headroom + + params->lro_wqe_sz -= mlx5e_get_rq_headroom(mdev, params) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, - BIT(params->log_rq_size), - BIT(params->mpwqe_log_stride_sz), + params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? + BIT(mlx5e_mpwqe_get_log_rq_size(params)) : + BIT(params->log_rq_mtu_frames), + BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params)), MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); } -static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, +bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { - u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && - !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ? - MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : - MLX5_WQ_TYPE_LINKED_LIST; - mlx5e_init_rq_type_params(mdev, params, rq_type); + return mlx5e_check_fragmented_striding_rq_cap(mdev) && + !MLX5_IPSEC_DEV(mdev) && + !(params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params)); +} + +void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) && + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_LINKED_LIST; } static void mlx5e_update_carrier(struct mlx5e_priv *priv) @@ -153,26 +242,6 @@ static void mlx5e_update_carrier_work(struct work_struct *work) mutex_unlock(&priv->state_lock); } -static void mlx5e_tx_timeout_work(struct work_struct *work) -{ - struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, - tx_timeout_work); - int err; - - rtnl_lock(); - mutex_lock(&priv->state_lock); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - goto unlock; - mlx5e_close_locked(priv->netdev); - err = mlx5e_open_locked(priv->netdev); - if (err) - netdev_err(priv->netdev, "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n", - err); -unlock: - mutex_unlock(&priv->state_lock); - rtnl_unlock(); -} - void mlx5e_update_stats(struct mlx5e_priv *priv) { int i; @@ -235,107 +304,38 @@ static void mlx5e_disable_async_events(struct mlx5e_priv *priv) synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC)); } -static inline int mlx5e_get_wqe_mtt_sz(void) -{ - /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. - * To avoid copying garbage after the mtt array, we allocate - * a little more. - */ - return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64), - MLX5_UMR_MTT_ALIGNMENT); -} - static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_icosq *sq, - struct mlx5e_umr_wqe *wqe, - u16 ix) + struct mlx5e_umr_wqe *wqe) { struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; - struct mlx5_wqe_data_seg *dseg = &wqe->data; - struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; - u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); - u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix); + u8 ds_cnt = DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_DS); cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | ds_cnt); cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; cseg->imm = rq->mkey_be; - ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN; + ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE; ucseg->xlt_octowords = cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE)); - ucseg->bsf_octowords = - cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset)); ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); - - dseg->lkey = sq->mkey_be; - dseg->addr = cpu_to_be64(wi->umr.mtt_addr); } static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, struct mlx5e_channel *c) { int wq_sz = mlx5_wq_ll_get_size(&rq->wq); - int mtt_sz = mlx5e_get_wqe_mtt_sz(); - int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; - int i; rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), GFP_KERNEL, cpu_to_node(c->cpu)); if (!rq->mpwqe.info) - goto err_out; - - /* We allocate more than mtt_sz as we will align the pointer */ - rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, - cpu_to_node(c->cpu)); - if (unlikely(!rq->mpwqe.mtt_no_align)) - goto err_free_wqe_info; - - for (i = 0; i < wq_sz; i++) { - struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; - - wi->umr.mtt = PTR_ALIGN(rq->mpwqe.mtt_no_align + i * mtt_alloc, - MLX5_UMR_ALIGN); - wi->umr.mtt_addr = dma_map_single(c->pdev, wi->umr.mtt, mtt_sz, - PCI_DMA_TODEVICE); - if (unlikely(dma_mapping_error(c->pdev, wi->umr.mtt_addr))) - goto err_unmap_mtts; + return -ENOMEM; - mlx5e_build_umr_wqe(rq, &c->icosq, &wi->umr.wqe, i); - } + mlx5e_build_umr_wqe(rq, &c->icosq, &rq->mpwqe.umr_wqe); return 0; - -err_unmap_mtts: - while (--i >= 0) { - struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; - - dma_unmap_single(c->pdev, wi->umr.mtt_addr, mtt_sz, - PCI_DMA_TODEVICE); - } - kfree(rq->mpwqe.mtt_no_align); -err_free_wqe_info: - kfree(rq->mpwqe.info); - -err_out: - return -ENOMEM; -} - -static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq) -{ - int wq_sz = mlx5_wq_ll_get_size(&rq->wq); - int mtt_sz = mlx5e_get_wqe_mtt_sz(); - int i; - - for (i = 0; i < wq_sz; i++) { - struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; - - dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, - PCI_DMA_TODEVICE); - } - kfree(rq->mpwqe.mtt_no_align); - kfree(rq->mpwqe.info); } static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, @@ -347,9 +347,6 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, u32 *in; int err; - if (!MLX5E_VALID_NUM_MTTS(npages)) - return -EINVAL; - in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -382,6 +379,11 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey); } +static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix) +{ + return (wqe_ix << MLX5E_LOG_ALIGNED_MPWQE_PPW) << PAGE_SHIFT; +} + static int mlx5e_alloc_rq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_rq_param *rqp, @@ -415,6 +417,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->channel = c; rq->ix = c->ix; rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL; if (IS_ERR(rq->xdp_prog)) { @@ -428,11 +431,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; - rq->buff.headroom = params->rq_headroom; + rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params); switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - rq->post_wqes = mlx5e_post_rx_mpwqes; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; @@ -450,8 +452,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->mpwqe.log_stride_sz = params->mpwqe_log_stride_sz; - rq->mpwqe.num_strides = BIT(params->mpwqe_log_num_strides); + rq->mpwqe.skb_from_cqe_mpwrq = + mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ? + mlx5e_skb_from_cqe_mpwrq_linear : + mlx5e_skb_from_cqe_mpwrq_nonlinear; + rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params); + rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params)); byte_count = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; @@ -490,7 +496,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, byte_count = params->lro_en ? params->lro_wqe_sz : - MLX5E_SW2HW_MTU(c->priv, c->netdev->mtu); + MLX5E_SW2HW_MTU(params, params->sw_mtu); #ifdef CONFIG_MLX5_EN_IPSEC if (MLX5_IPSEC_DEV(mdev)) byte_count += MLX5E_METADATA_ETHER_LEN; @@ -510,9 +516,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { - u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, i) << PAGE_SHIFT; + u64 dma_offset = mlx5e_get_mpwqe_offset(rq, i); - wqe->data.addr = cpu_to_be64(dma_offset); + wqe->data.addr = cpu_to_be64(dma_offset + rq->buff.headroom); } wqe->data.byte_count = cpu_to_be32(byte_count); @@ -558,7 +564,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - mlx5e_rq_free_mpwqe_info(rq); + kfree(rq->mpwqe.info); mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ @@ -615,8 +621,7 @@ static int mlx5e_create_rq(struct mlx5e_rq *rq, static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) { - struct mlx5e_channel *c = rq->channel; - struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_core_dev *mdev = rq->mdev; void *in; void *rqc; @@ -898,7 +903,6 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = c->mdev; int err; - sq->mkey_be = c->mkey_be; sq->channel = c; sq->uar_map = mdev->mlx5e_res.bfreg.map; @@ -953,6 +957,7 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) return 0; } +static void mlx5e_sq_recover(struct work_struct *work); static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int txq_ix, struct mlx5e_params *params, @@ -970,8 +975,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->channel = c; sq->txq_ix = txq_ix; sq->uar_map = mdev->mlx5e_res.bfreg.map; - sq->max_inline = params->tx_max_inline; sq->min_inline_mode = params->tx_min_inline_mode; + INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); @@ -1038,6 +1043,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev, MLX5_SET(sqc, sqc, min_wqe_inline_mode, csp->min_inline_mode); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.bfreg.index); @@ -1156,9 +1162,20 @@ err_free_txqsq: return err; } +static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) +{ + WARN_ONCE(sq->cc != sq->pc, + "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", + sq->sqn, sq->cc, sq->pc); + sq->cc = 0; + sq->dma_fifo_cc = 0; + sq->pc = 0; +} + static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) { sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); + clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); netdev_tx_reset_queue(sq->txq); netif_tx_start_queue(sq->txq); @@ -1203,6 +1220,107 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) mlx5e_free_txqsq(sq); } +static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) +{ + unsigned long exp_time = jiffies + msecs_to_jiffies(2000); + + while (time_before(jiffies, exp_time)) { + if (sq->cc == sq->pc) + return 0; + + msleep(20); + } + + netdev_err(sq->channel->netdev, + "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", + sq->sqn, sq->cc, sq->pc); + + return -ETIMEDOUT; +} + +static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state) +{ + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + struct mlx5e_modify_sq_param msp = {0}; + int err; + + msp.curr_state = curr_state; + msp.next_state = MLX5_SQC_STATE_RST; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn); + return err; + } + + memset(&msp, 0, sizeof(msp)); + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn); + return err; + } + + return 0; +} + +static void mlx5e_sq_recover(struct work_struct *work) +{ + struct mlx5e_txqsq_recover *recover = + container_of(work, struct mlx5e_txqsq_recover, + recover_work); + struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq, + recover); + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + u8 state; + int err; + + err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); + if (err) { + netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", + sq->sqn, err); + return; + } + + if (state != MLX5_RQC_STATE_ERR) { + netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); + return; + } + + netif_tx_disable_queue(sq->txq); + + if (mlx5e_wait_for_sq_flush(sq)) + return; + + /* If the interval between two consecutive recovers per SQ is too + * short, don't recover to avoid infinite loop of ERR_CQE -> recover. + * If we reached this state, there is probably a bug that needs to be + * fixed. let's keep the queue close and let tx timeout cleanup. + */ + if (jiffies_to_msecs(jiffies - recover->last_recover) < + MLX5E_SQ_RECOVER_MIN_INTERVAL) { + netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n", + sq->sqn); + return; + } + + /* At this point, no new packets will arrive from the stack as TXQ is + * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all + * pending WQEs. SQ can safely reset the SQ. + */ + if (mlx5e_sq_to_ready(sq, state)) + return; + + mlx5e_reset_txqsq_cc_pc(sq); + sq->stats.recover++; + recover->last_recover = jiffies; + mlx5e_activate_txqsq(sq); +} + static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_sq_param *param, @@ -1743,39 +1861,47 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, struct mlx5e_rq_param *param) { + struct mlx5_core_dev *mdev = priv->mdev; void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - MLX5_SET(wq, wq, log_wqe_num_of_strides, params->mpwqe_log_num_strides - 9); - MLX5_SET(wq, wq, log_wqe_stride_size, params->mpwqe_log_stride_sz - 6); + MLX5_SET(wq, wq, log_wqe_num_of_strides, + mlx5e_mpwqe_get_log_num_strides(mdev, params) - + MLX5_MPWQE_LOG_NUM_STRIDES_BASE); + MLX5_SET(wq, wq, log_wqe_stride_size, + mlx5e_mpwqe_get_log_stride_size(mdev, params) - + MLX5_MPWQE_LOG_STRIDE_SZ_BASE); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); + MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params)); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); } MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); - MLX5_SET(wq, wq, log_wq_sz, params->log_rq_size); - MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); + MLX5_SET(wq, wq, pd, mdev->mlx5e_res.pdn); MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); - param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); + param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev); param->wq.linear = 1; } -static void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, +static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv, struct mlx5e_rq_param *param) { + struct mlx5_core_dev *mdev = priv->mdev; void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); + MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter); param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev); } @@ -1816,15 +1942,17 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, struct mlx5e_cq_param *param) { + struct mlx5_core_dev *mdev = priv->mdev; void *cqc = param->cqc; u8 log_cq_size; switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - log_cq_size = params->log_rq_size + params->mpwqe_log_num_strides; + log_cq_size = mlx5e_mpwqe_get_log_rq_size(params) + + mlx5e_mpwqe_get_log_num_strides(mdev, params); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - log_cq_size = params->log_rq_size; + log_cq_size = params->log_rq_mtu_frames; } MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); @@ -2375,10 +2503,10 @@ static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv, mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); } -static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu) +static int mlx5e_set_mtu(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, u16 mtu) { - struct mlx5_core_dev *mdev = priv->mdev; - u16 hw_mtu = MLX5E_SW2HW_MTU(priv, mtu); + u16 hw_mtu = MLX5E_SW2HW_MTU(params, mtu); int err; err = mlx5_set_port_mtu(mdev, hw_mtu, 1); @@ -2390,9 +2518,9 @@ static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu) return 0; } -static void mlx5e_query_mtu(struct mlx5e_priv *priv, u16 *mtu) +static void mlx5e_query_mtu(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, u16 *mtu) { - struct mlx5_core_dev *mdev = priv->mdev; u16 hw_mtu = 0; int err; @@ -2400,25 +2528,27 @@ static void mlx5e_query_mtu(struct mlx5e_priv *priv, u16 *mtu) if (err || !hw_mtu) /* fallback to port oper mtu */ mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1); - *mtu = MLX5E_HW2SW_MTU(priv, hw_mtu); + *mtu = MLX5E_HW2SW_MTU(params, hw_mtu); } static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) { + struct mlx5e_params *params = &priv->channels.params; struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; u16 mtu; int err; - err = mlx5e_set_mtu(priv, netdev->mtu); + err = mlx5e_set_mtu(mdev, params, params->sw_mtu); if (err) return err; - mlx5e_query_mtu(priv, &mtu); - if (mtu != netdev->mtu) + mlx5e_query_mtu(mdev, params, &mtu); + if (mtu != params->sw_mtu) netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n", - __func__, mtu, netdev->mtu); + __func__, mtu, params->sw_mtu); - netdev->mtu = mtu; + params->sw_mtu = mtu; return 0; } @@ -2646,15 +2776,16 @@ static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev, return mlx5e_alloc_cq_common(mdev, param, cq); } -static int mlx5e_open_drop_rq(struct mlx5_core_dev *mdev, +static int mlx5e_open_drop_rq(struct mlx5e_priv *priv, struct mlx5e_rq *drop_rq) { + struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_cq_param cq_param = {}; struct mlx5e_rq_param rq_param = {}; struct mlx5e_cq *cq = &drop_rq->cq; int err; - mlx5e_build_drop_rq_param(mdev, &rq_param); + mlx5e_build_drop_rq_param(priv, &rq_param); err = mlx5e_alloc_drop_cq(mdev, cq, &cq_param); if (err) @@ -2672,6 +2803,10 @@ static int mlx5e_open_drop_rq(struct mlx5_core_dev *mdev, if (err) goto err_free_rq; + err = mlx5e_modify_rq_state(drop_rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + if (err) + mlx5_core_warn(priv->mdev, "modify_rq_state failed, rx_if_down_packets won't be counted %d\n", err); + return 0; err_free_rq: @@ -3097,20 +3232,28 @@ typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable); static int set_feature_lro(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channels new_channels = {}; + struct mlx5e_params *old_params; int err = 0; bool reset; mutex_lock(&priv->state_lock); - reset = (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST); - reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state); + old_params = &priv->channels.params; + reset = test_bit(MLX5E_STATE_OPENED, &priv->state); - new_channels.params = priv->channels.params; + new_channels.params = *old_params; new_channels.params.lro_en = enable; + if (old_params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST) { + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params) == + mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params)) + reset = false; + } + if (!reset) { - priv->channels.params = new_channels.params; + *old_params = new_channels.params; err = mlx5e_modify_tirs_lro(priv); goto out; } @@ -3239,24 +3382,20 @@ static int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) { netdev_features_t oper_features = netdev->features; - int err; + int err = 0; + +#define MLX5E_HANDLE_FEATURE(feature, handler) \ + mlx5e_handle_feature(netdev, &oper_features, features, feature, handler) - err = mlx5e_handle_feature(netdev, &oper_features, features, - NETIF_F_LRO, set_feature_lro); - err |= mlx5e_handle_feature(netdev, &oper_features, features, - NETIF_F_HW_VLAN_CTAG_FILTER, + err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER, set_feature_cvlan_filter); - err |= mlx5e_handle_feature(netdev, &oper_features, features, - NETIF_F_HW_TC, set_feature_tc_num_filters); - err |= mlx5e_handle_feature(netdev, &oper_features, features, - NETIF_F_RXALL, set_feature_rx_all); - err |= mlx5e_handle_feature(netdev, &oper_features, features, - NETIF_F_RXFCS, set_feature_rx_fcs); - err |= mlx5e_handle_feature(netdev, &oper_features, features, - NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan); #ifdef CONFIG_RFS_ACCEL - err |= mlx5e_handle_feature(netdev, &oper_features, features, - NETIF_F_NTUPLE, set_feature_arfs); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_NTUPLE, set_feature_arfs); #endif if (err) { @@ -3290,34 +3429,40 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_channels new_channels = {}; - int curr_mtu; + struct mlx5e_params *params; int err = 0; bool reset; mutex_lock(&priv->state_lock); - reset = !priv->channels.params.lro_en && - (priv->channels.params.rq_wq_type != - MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); + params = &priv->channels.params; + reset = !params->lro_en; reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state); - curr_mtu = netdev->mtu; - netdev->mtu = new_mtu; + new_channels.params = *params; + new_channels.params.sw_mtu = new_mtu; + + if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST) { + u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params); + u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params); + + reset = reset && (ppw_old != ppw_new); + } if (!reset) { + params->sw_mtu = new_mtu; mlx5e_set_dev_port_mtu(priv); + netdev->mtu = params->sw_mtu; goto out; } - new_channels.params = priv->channels.params; err = mlx5e_open_channels(priv, &new_channels); - if (err) { - netdev->mtu = curr_mtu; + if (err) goto out; - } mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_set_dev_port_mtu); + netdev->mtu = new_channels.params.sw_mtu; out: mutex_unlock(&priv->state_lock); @@ -3607,21 +3752,11 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev, struct mlx5e_txqsq *sq) { - struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_core_dev *mdev = priv->mdev; - int irqn_not_used, eqn; - struct mlx5_eq *eq; + struct mlx5_eq *eq = sq->cq.mcq.eq; u32 eqe_count; - if (mlx5_vector2eqn(mdev, sq->cq.mcq.vector, &eqn, &irqn_not_used)) - return false; - - eq = mlx5_eqn2eq(mdev, eqn); - if (IS_ERR(eq)) - return false; - netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", - eqn, eq->cons_index, eq->irqn); + eq->eqn, eq->cons_index, eq->irqn); eqe_count = mlx5_eq_poll_irq_disabled(eq); if (!eqe_count) @@ -3632,13 +3767,19 @@ static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev, return true; } -static void mlx5e_tx_timeout(struct net_device *dev) +static void mlx5e_tx_timeout_work(struct work_struct *work) { - struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + tx_timeout_work); + struct net_device *dev = priv->netdev; bool reopen_channels = false; - int i; + int i, err; - netdev_err(dev, "TX timeout detected\n"); + rtnl_lock(); + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i); @@ -3646,7 +3787,9 @@ static void mlx5e_tx_timeout(struct net_device *dev) if (!netif_xmit_stopped(dev_queue)) continue; - netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", + + netdev_err(dev, + "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, jiffies_to_usecs(jiffies - dev_queue->trans_start)); @@ -3659,8 +3802,27 @@ static void mlx5e_tx_timeout(struct net_device *dev) } } - if (reopen_channels && test_bit(MLX5E_STATE_OPENED, &priv->state)) - schedule_work(&priv->tx_timeout_work); + if (!reopen_channels) + goto unlock; + + mlx5e_close_locked(dev); + err = mlx5e_open_locked(dev); + if (err) + netdev_err(priv->netdev, + "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n", + err); + +unlock: + mutex_unlock(&priv->state_lock); + rtnl_unlock(); +} + +static void mlx5e_tx_timeout(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + netdev_err(dev, "TX timeout detected\n"); + queue_work(priv->wq, &priv->tx_timeout_work); } static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) @@ -3710,7 +3872,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) bpf_prog_put(old_prog); if (reset) /* change RQ type according to priv->xdp_prog */ - mlx5e_set_rq_params(priv->mdev, &priv->channels.params); + mlx5e_set_rq_type(priv->mdev, &priv->channels.params); if (was_opened && reset) mlx5e_open_locked(netdev); @@ -3855,15 +4017,6 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) return 0; } -u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev) -{ - int bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; - - return bf_buf_size - - sizeof(struct mlx5e_tx_wqe) + - 2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/; -} - void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, int num_channels) { @@ -3903,16 +4056,20 @@ static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw) return 0; } -static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw) +static bool slow_pci_heuristic(struct mlx5_core_dev *mdev) { - return (link_speed && pci_bw && - (pci_bw < 40000) && (pci_bw < link_speed)); -} + u32 link_speed = 0; + u32 pci_bw = 0; -static bool hw_lro_heuristic(u32 link_speed, u32 pci_bw) -{ - return !(link_speed && pci_bw && - (pci_bw <= 16000) && (pci_bw < link_speed)); + mlx5e_get_max_linkspeed(mdev, &link_speed); + mlx5e_get_pci_bw(mdev, &pci_bw); + mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n", + link_speed, pci_bw); + +#define MLX5E_SLOW_PCI_RATIO (2) + + return link_speed && pci_bw && + link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; } void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) @@ -3964,7 +4121,7 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) MLX5_CQ_PERIOD_MODE_START_FROM_CQE); } -u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) +static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) { int i; @@ -3978,20 +4135,15 @@ u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - u16 max_channels) + u16 max_channels, u16 mtu) { - u8 cq_period_mode = 0; - u32 link_speed = 0; - u32 pci_bw = 0; + u8 rx_cq_period_mode; + params->sw_mtu = mtu; + params->hard_mtu = MLX5E_ETH_HARD_MTU; params->num_channels = max_channels; params->num_tc = 1; - mlx5e_get_max_linkspeed(mdev, &link_speed); - mlx5e_get_pci_bw(mdev, &pci_bw); - mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n", - link_speed, pci_bw); - /* SQ */ params->log_sq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : @@ -4001,30 +4153,34 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, params->rx_cqe_compress_def = false; if (MLX5_CAP_GEN(mdev, cqe_compression) && MLX5_CAP_GEN(mdev, vport_group_manager)) - params->rx_cqe_compress_def = cqe_compress_heuristic(link_speed, pci_bw); + params->rx_cqe_compress_def = slow_pci_heuristic(mdev); MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); /* RQ */ - mlx5e_set_rq_params(mdev, params); + if (mlx5e_striding_rq_possible(mdev, params)) + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, + !slow_pci_heuristic(mdev)); + mlx5e_set_rq_type(mdev, params); + mlx5e_init_rq_type_params(mdev, params); /* HW LRO */ /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */ if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) - params->lro_en = hw_lro_heuristic(link_speed, pci_bw); + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params)) + params->lro_en = !slow_pci_heuristic(mdev); params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); /* CQ moderation params */ - cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); - mlx5e_set_rx_cq_mode_params(params, cq_period_mode); - mlx5e_set_tx_cq_mode_params(params, cq_period_mode); + mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode); + mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); /* TX inline */ - params->tx_max_inline = mlx5e_get_max_inline_cap(mdev); params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev); /* RSS */ @@ -4046,9 +4202,9 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->profile = profile; priv->ppriv = ppriv; priv->msglevel = MLX5E_MSG_LEVEL; - priv->hard_mtu = MLX5E_ETH_HARD_MTU; - mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev)); + mlx5e_build_nic_params(mdev, &priv->channels.params, + profile->max_nch(mdev), netdev->mtu); mutex_init(&priv->state_lock); @@ -4107,6 +4263,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->vlan_features |= NETIF_F_RXCSUM; netdev->vlan_features |= NETIF_F_RXHASH; + netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX; + netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX; + if (!!MLX5_CAP_ETH(mdev, lro_cap)) netdev->vlan_features |= NETIF_F_LRO; @@ -4186,7 +4345,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) mlx5e_ipsec_build_netdev(priv); } -static void mlx5e_create_q_counter(struct mlx5e_priv *priv) +static void mlx5e_create_q_counters(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; int err; @@ -4196,14 +4355,21 @@ static void mlx5e_create_q_counter(struct mlx5e_priv *priv) mlx5_core_warn(mdev, "alloc queue counter failed, %d\n", err); priv->q_counter = 0; } + + err = mlx5_core_alloc_q_counter(mdev, &priv->drop_rq_q_counter); + if (err) { + mlx5_core_warn(mdev, "alloc drop RQ counter failed, %d\n", err); + priv->drop_rq_q_counter = 0; + } } -static void mlx5e_destroy_q_counter(struct mlx5e_priv *priv) +static void mlx5e_destroy_q_counters(struct mlx5e_priv *priv) { - if (!priv->q_counter) - return; + if (priv->q_counter) + mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter); - mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter); + if (priv->drop_rq_q_counter) + mlx5_core_dealloc_q_counter(priv->mdev, priv->drop_rq_q_counter); } static void mlx5e_nic_init(struct mlx5_core_dev *mdev, @@ -4315,7 +4481,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) /* MTU range: 68 - hw-specific max */ netdev->min_mtu = ETH_MIN_MTU; mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); - netdev->max_mtu = MLX5E_HW2SW_MTU(priv, max_mtu); + netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); mlx5e_set_dev_port_mtu(priv); mlx5_lag_add(mdev, netdev); @@ -4436,18 +4602,18 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) if (err) goto out; - err = mlx5e_open_drop_rq(mdev, &priv->drop_rq); + mlx5e_create_q_counters(priv); + + err = mlx5e_open_drop_rq(priv, &priv->drop_rq); if (err) { mlx5_core_err(mdev, "open drop rq failed, %d\n", err); - goto err_cleanup_tx; + goto err_destroy_q_counters; } err = profile->init_rx(priv); if (err) goto err_close_drop_rq; - mlx5e_create_q_counter(priv); - if (profile->enable) profile->enable(priv); @@ -4456,7 +4622,8 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); -err_cleanup_tx: +err_destroy_q_counters: + mlx5e_destroy_q_counters(priv); profile->cleanup_tx(priv); out: @@ -4473,9 +4640,9 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv) profile->disable(priv); flush_workqueue(priv->wq); - mlx5e_destroy_q_counter(priv); profile->cleanup_rx(priv); mlx5e_close_drop_rq(&priv->drop_rq); + mlx5e_destroy_q_counters(priv); profile->cleanup_tx(priv); cancel_delayed_work_sync(&priv->update_stats_work); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 500d817d2b0a..d8f68e4d1018 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -883,14 +883,14 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + params->hard_mtu = MLX5E_ETH_HARD_MTU; params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE; params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; - params->log_rq_size = MLX5E_REP_PARAMS_LOG_RQ_SIZE; + params->log_rq_mtu_frames = MLX5E_REP_PARAMS_LOG_RQ_SIZE; params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(params, cq_period_mode); - params->tx_max_inline = mlx5e_get_max_inline_cap(mdev); params->num_tc = 1; params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; @@ -931,8 +931,6 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev, priv->channels.params.num_channels = profile->max_nch(mdev); - priv->hard_mtu = MLX5E_ETH_HARD_MTU; - mlx5e_build_rep_params(mdev, &priv->channels.params); mlx5e_build_rep_netdev(netdev); @@ -1160,6 +1158,15 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) kfree(ppriv); /* mlx5e_rep_priv */ } +static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv; + + rpriv = mlx5e_rep_to_rep_priv(rep); + + return rpriv->netdev; +} + static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -1172,6 +1179,7 @@ static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv) rep_if.load = mlx5e_vport_rep_load; rep_if.unload = mlx5e_vport_rep_unload; + rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH); } } @@ -1199,6 +1207,7 @@ void mlx5e_register_vport_reps(struct mlx5e_priv *priv) rep_if.load = mlx5e_nic_rep_load; rep_if.unload = mlx5e_nic_rep_unload; + rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; rep_if.priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index e5c3ab46a24a..176645762e49 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -53,7 +53,7 @@ static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc, void *data) { - u32 ci = cqcc & cq->wq.sz_m1; + u32 ci = cqcc & cq->wq.fbc.sz_m1; memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64)); } @@ -75,9 +75,10 @@ static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc) static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n) { - u8 op_own = (cqcc >> cq->wq.log_sz) & 1; - u32 wq_sz = 1 << cq->wq.log_sz; - u32 ci = cqcc & cq->wq.sz_m1; + struct mlx5_frag_buf_ctrl *fbc = &cq->wq.fbc; + u8 op_own = (cqcc >> fbc->log_sz) & 1; + u32 wq_sz = 1 << fbc->log_sz; + u32 ci = cqcc & fbc->sz_m1; u32 ci_top = min_t(u32, wq_sz, ci + n); for (; ci < ci_top; ci++, n--) { @@ -102,7 +103,7 @@ static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt; cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum; cq->title.op_own &= 0xf0; - cq->title.op_own |= 0x01 & (cqcc >> cq->wq.log_sz); + cq->title.op_own |= 0x01 & (cqcc >> cq->wq.fbc.log_sz); cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter); if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) @@ -295,46 +296,36 @@ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) mlx5e_free_rx_wqe(rq, wi); } -static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) -{ - return rq->mpwqe.num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; -} - static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq, struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 frag_offset, - u32 len) + struct mlx5e_dma_info *di, + u32 frag_offset, u32 len) { unsigned int truesize = ALIGN(len, BIT(rq->mpwqe.log_stride_sz)); dma_sync_single_for_cpu(rq->pdev, - wi->umr.dma_info[page_idx].addr + frag_offset, + di->addr + frag_offset, len, DMA_FROM_DEVICE); - wi->skbs_frags[page_idx]++; + page_ref_inc(di->page); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - wi->umr.dma_info[page_idx].page, frag_offset, - len, truesize); + di->page, frag_offset, len, truesize); } static inline void mlx5e_copy_skb_header_mpwqe(struct device *pdev, struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 offset, - u32 headlen) + struct mlx5e_dma_info *dma_info, + u32 offset, u32 headlen) { u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset); - struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[page_idx]; unsigned int len; /* Aligning len to sizeof(long) optimizes memcpy performance */ len = ALIGN(headlen_pg, sizeof(long)); dma_sync_single_for_cpu(pdev, dma_info->addr + offset, len, DMA_FROM_DEVICE); - skb_copy_to_linear_data_offset(skb, 0, - page_address(dma_info->page) + offset, - len); + skb_copy_to_linear_data(skb, page_address(dma_info->page) + offset, len); + if (unlikely(offset + headlen > PAGE_SIZE)) { dma_info++; headlen_pg = len; @@ -347,14 +338,49 @@ mlx5e_copy_skb_header_mpwqe(struct device *pdev, } } -static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) +void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) +{ + const bool no_xdp_xmit = + bitmap_empty(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE); + struct mlx5e_dma_info *dma_info = wi->umr.dma_info; + int i; + + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) + if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) + mlx5e_page_release(rq, &dma_info[i], true); +} + +static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->wq; + struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); + + rq->mpwqe.umr_in_progress = false; + + mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); + + /* ensure wqes are visible to device before updating doorbell record */ + dma_wmb(); + + mlx5_wq_ll_update_db_record(wq); +} + +static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq) +{ + return sq->pc >> MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; +} + +static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0]; struct mlx5e_icosq *sq = &rq->channel->icosq; struct mlx5_wq_cyc *wq = &sq->wq; - struct mlx5e_umr_wqe *wqe; - u8 num_wqebbs = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_BB); + struct mlx5e_umr_wqe *umr_wqe; + u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1); + int err; u16 pi; + int i; /* fill sq edge with nops to avoid wqe wrap around */ while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { @@ -362,90 +388,44 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) mlx5e_post_nop(wq, sq->sqn, &sq->pc); } - wqe = mlx5_wq_cyc_get_wqe(wq, pi); - memcpy(wqe, &wi->umr.wqe, sizeof(*wqe)); - wqe->ctrl.opmod_idx_opcode = - cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | - MLX5_OPCODE_UMR); - - sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; - sq->pc += num_wqebbs; - mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); -} - -static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, - u16 ix) -{ - struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; - int pg_strides = mlx5e_mpwqe_strides_per_page(rq); - struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0]; - int err; - int i; + umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); + if (unlikely(mlx5e_icosq_wrap_cnt(sq) < 2)) + memcpy(umr_wqe, &rq->mpwqe.umr_wqe, + offsetof(struct mlx5e_umr_wqe, inline_mtts)); for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { err = mlx5e_page_alloc_mapped(rq, dma_info); if (unlikely(err)) goto err_unmap; - wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR); - page_ref_add(dma_info->page, pg_strides); + umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR); } - memset(wi->skbs_frags, 0, sizeof(*wi->skbs_frags) * MLX5_MPWRQ_PAGES_PER_WQE); + bitmap_zero(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE); wi->consumed_strides = 0; + rq->mpwqe.umr_in_progress = true; + + umr_wqe->ctrl.opmod_idx_opcode = + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_UMR); + umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset); + + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; + sq->pc += MLX5E_UMR_WQEBBS; + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &umr_wqe->ctrl); + return 0; err_unmap: while (--i >= 0) { dma_info--; - page_ref_sub(dma_info->page, pg_strides); mlx5e_page_release(rq, dma_info, true); } + rq->stats.buff_alloc_err++; return err; } -void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) -{ - int pg_strides = mlx5e_mpwqe_strides_per_page(rq); - struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0]; - int i; - - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { - page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]); - mlx5e_page_release(rq, dma_info, true); - } -} - -static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) -{ - struct mlx5_wq_ll *wq = &rq->wq; - struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); - - rq->mpwqe.umr_in_progress = false; - - mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); - - /* ensure wqes are visible to device before updating doorbell record */ - dma_wmb(); - - mlx5_wq_ll_update_db_record(wq); -} - -static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) -{ - int err; - - err = mlx5e_alloc_rx_umr_mpwqe(rq, ix); - if (unlikely(err)) { - rq->stats.buff_alloc_err++; - return err; - } - rq->mpwqe.umr_in_progress = true; - mlx5e_post_umr_wqe(rq, ix); - return 0; -} - void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; @@ -544,7 +524,7 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) if (!rq->mpwqe.umr_in_progress) mlx5e_alloc_rx_mpwqe(rq, wq->head); - return true; + return false; } static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp) @@ -766,8 +746,7 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, prefetchw(wqe); - if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || - MLX5E_SW2HW_MTU(rq->channel->priv, rq->netdev->mtu) < dma_len)) { + if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || rq->hw_mtu < dma_len)) { rq->stats.xdp_drop++; return false; } @@ -806,7 +785,7 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, /* move page to reference to sq responsibility, * and mark so it's not put back in page-cache. */ - rq->wqe.xdp_xmit = true; + __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ sq->db.di[pi] = *di; sq->pc++; @@ -855,6 +834,24 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq, } static inline +struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va, + u32 frag_size, u16 headroom, + u32 cqe_bcnt) +{ + struct sk_buff *skb = build_skb(va, frag_size); + + if (unlikely(!skb)) { + rq->stats.buff_alloc_err++; + return NULL; + } + + skb_reserve(skb, headroom); + skb_put(skb, cqe_bcnt); + + return skb; +} + +static inline struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt) { @@ -869,10 +866,8 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - dma_sync_single_range_for_cpu(rq->pdev, - di->addr + wi->offset, - 0, frag_size, - DMA_FROM_DEVICE); + dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset, + frag_size, DMA_FROM_DEVICE); prefetch(data); wi->offset += frag_size; @@ -887,18 +882,13 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, if (consumed) return NULL; /* page/packet was consumed by XDP */ - skb = build_skb(va, frag_size); - if (unlikely(!skb)) { - rq->stats.buff_alloc_err++; + skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt); + if (unlikely(!skb)) return NULL; - } /* queue up for recycling/reuse */ page_ref_inc(di->page); - skb_reserve(skb, rx_headroom); - skb_put(skb, cqe_bcnt); - return skb; } @@ -920,9 +910,8 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt); if (!skb) { /* probably for XDP */ - if (rq->wqe.xdp_xmit) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { wi->di.page = NULL; - rq->wqe.xdp_xmit = false; /* do not return page to cache, it will be returned on XDP_TX completion */ goto wq_ll_pop; } @@ -962,9 +951,8 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt); if (!skb) { - if (rq->wqe.xdp_xmit) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { wi->di.page = NULL; - rq->wqe.xdp_xmit = false; /* do not return page to cache, it will be returned on XDP_TX completion */ goto wq_ll_pop; } @@ -987,23 +975,28 @@ wq_ll_pop: } #endif -static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, - struct mlx5_cqe64 *cqe, - struct mlx5e_mpw_info *wi, - u32 cqe_bcnt, - struct sk_buff *skb) +struct sk_buff * +mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, u32 head_offset, u32 page_idx) { - u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); - u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; - u32 head_offset = wqe_offset & (PAGE_SIZE - 1); - u32 page_idx = wqe_offset >> PAGE_SHIFT; - u32 head_page_idx = page_idx; u16 headlen = min_t(u16, MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, cqe_bcnt); + struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx]; u32 frag_offset = head_offset + headlen; - u16 byte_cnt = cqe_bcnt - headlen; + u32 byte_cnt = cqe_bcnt - headlen; + struct mlx5e_dma_info *head_di = di; + struct sk_buff *skb; + + skb = napi_alloc_skb(rq->cq.napi, + ALIGN(MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, sizeof(long))); + if (unlikely(!skb)) { + rq->stats.buff_alloc_err++; + return NULL; + } + + prefetchw(skb->data); if (unlikely(frag_offset >= PAGE_SIZE)) { - page_idx++; + di++; frag_offset -= PAGE_SIZE; } @@ -1011,18 +1004,59 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); - mlx5e_add_skb_frag_mpwqe(rq, skb, wi, page_idx, frag_offset, + mlx5e_add_skb_frag_mpwqe(rq, skb, di, frag_offset, pg_consumed_bytes); byte_cnt -= pg_consumed_bytes; frag_offset = 0; - page_idx++; + di++; } /* copy header */ - mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, wi, head_page_idx, + mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, head_di, head_offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; skb->len += headlen; + + return skb; +} + +struct sk_buff * +mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, u32 head_offset, u32 page_idx) +{ + struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx]; + u16 rx_headroom = rq->buff.headroom; + u32 cqe_bcnt32 = cqe_bcnt; + struct sk_buff *skb; + void *va, *data; + u32 frag_size; + bool consumed; + + va = page_address(di->page) + head_offset; + data = va + rx_headroom; + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32); + + dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset, + frag_size, DMA_FROM_DEVICE); + prefetch(data); + + rcu_read_lock(); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32); + rcu_read_unlock(); + if (consumed) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) + __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ + return NULL; /* page/packet was consumed by XDP */ + } + + skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32); + if (unlikely(!skb)) + return NULL; + + /* queue up for recycling/reuse */ + page_ref_inc(di->page); + + return skb; } void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) @@ -1030,7 +1064,11 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); u16 wqe_id = be16_to_cpu(cqe->wqe_id); struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id]; - struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id); + u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); + u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; + u32 head_offset = wqe_offset & (PAGE_SIZE - 1); + u32 page_idx = wqe_offset >> PAGE_SHIFT; + struct mlx5e_rx_wqe *wqe; struct sk_buff *skb; u16 cqe_bcnt; @@ -1046,18 +1084,13 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto mpwrq_cqe_out; } - skb = napi_alloc_skb(rq->cq.napi, - ALIGN(MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, - sizeof(long))); - if (unlikely(!skb)) { - rq->stats.buff_alloc_err++; - goto mpwrq_cqe_out; - } - - prefetchw(skb->data); cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); - mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb); + skb = rq->mpwqe.skb_from_cqe_mpwrq(rq, wi, cqe_bcnt, head_offset, + page_idx); + if (!skb) + goto mpwrq_cqe_out; + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); napi_gro_receive(rq->cq.napi, skb); @@ -1065,6 +1098,7 @@ mpwrq_cqe_out: if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) return; + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id); mlx5e_free_rx_mpwqe(rq, wi); mlx5_wq_ll_pop(&rq->wq, cqe->wqe_id, &wqe->next.next_wqe_index); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 5f0f3493d747..b08c94422907 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -60,6 +60,8 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_recover) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, @@ -153,6 +155,8 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_queue_stopped += sq_stats->stopped; s->tx_queue_wake += sq_stats->wake; s->tx_queue_dropped += sq_stats->dropped; + s->tx_cqe_err += sq_stats->cqe_err; + s->tx_recover += sq_stats->recover; s->tx_xmit_more += sq_stats->xmit_more; s->tx_csum_partial_inner += sq_stats->csum_partial_inner; s->tx_csum_none += sq_stats->csum_none; @@ -170,11 +174,24 @@ static const struct counter_desc q_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) }, }; +static const struct counter_desc drop_rq_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_if_down_packets) }, +}; + #define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc) +#define NUM_DROP_RQ_COUNTERS ARRAY_SIZE(drop_rq_stats_desc) static int mlx5e_grp_q_get_num_stats(struct mlx5e_priv *priv) { - return priv->q_counter ? NUM_Q_COUNTERS : 0; + int num_stats = 0; + + if (priv->q_counter) + num_stats += NUM_Q_COUNTERS; + + if (priv->drop_rq_q_counter) + num_stats += NUM_DROP_RQ_COUNTERS; + + return num_stats; } static int mlx5e_grp_q_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) @@ -182,7 +199,13 @@ static int mlx5e_grp_q_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) int i; for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format); + strcpy(data + (idx++) * ETH_GSTRING_LEN, + q_stats_desc[i].format); + + for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + drop_rq_stats_desc[i].format); + return idx; } @@ -191,7 +214,11 @@ static int mlx5e_grp_q_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) int i; for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++) - data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, q_stats_desc, i); + data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, + q_stats_desc, i); + for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++) + data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, + drop_rq_stats_desc, i); return idx; } @@ -199,16 +226,76 @@ static void mlx5e_grp_q_update_stats(struct mlx5e_priv *priv) { struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; u32 out[MLX5_ST_SZ_DW(query_q_counter_out)]; - int err; - if (!priv->q_counter) - return; + if (priv->q_counter && + !mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out, + sizeof(out))) + qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out, + out, out_of_buffer); + if (priv->drop_rq_q_counter && + !mlx5_core_query_q_counter(priv->mdev, priv->drop_rq_q_counter, 0, + out, sizeof(out))) + qcnt->rx_if_down_packets = MLX5_GET(query_q_counter_out, out, + out_of_buffer); +} + +#define VNIC_ENV_OFF(c) MLX5_BYTE_OFF(query_vnic_env_out, c) +static const struct counter_desc vnic_env_stats_desc[] = { + { "rx_steer_missed_packets", + VNIC_ENV_OFF(vport_env.nic_receive_steering_discard) }, +}; + +#define NUM_VNIC_ENV_COUNTERS ARRAY_SIZE(vnic_env_stats_desc) + +static int mlx5e_grp_vnic_env_get_num_stats(struct mlx5e_priv *priv) +{ + return MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard) ? + NUM_VNIC_ENV_COUNTERS : 0; +} + +static int mlx5e_grp_vnic_env_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) + return idx; + + for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vnic_env_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_vnic_env_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) + return idx; - err = mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out, sizeof(out)); - if (err) + for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vnic.query_vnic_env_out, + vnic_env_stats_desc, i); + return idx; +} + +static void mlx5e_grp_vnic_env_update_stats(struct mlx5e_priv *priv) +{ + u32 *out = (u32 *)priv->stats.vnic.query_vnic_env_out; + int outlen = MLX5_ST_SZ_BYTES(query_vnic_env_out); + u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {0}; + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) return; - qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out, out, out_of_buffer); + MLX5_SET(query_vnic_env_in, in, opcode, + MLX5_CMD_OP_QUERY_VNIC_ENV); + MLX5_SET(query_vnic_env_in, in, op_mod, 0); + MLX5_SET(query_vnic_env_in, in, other_vport, 0); + mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); } #define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c) @@ -754,7 +841,15 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, }; +static const struct counter_desc pport_pfc_stall_stats_desc[] = { + { "tx_pause_storm_warning_events ", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) }, + { "tx_pause_storm_error_events", PPORT_PER_PRIO_OFF(device_stall_critical_watermark_cnt) }, +}; + #define NUM_PPORT_PER_PRIO_PFC_COUNTERS ARRAY_SIZE(pport_per_prio_pfc_stats_desc) +#define NUM_PPORT_PFC_STALL_COUNTERS(priv) (ARRAY_SIZE(pport_pfc_stall_stats_desc) * \ + MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) * \ + MLX5_CAP_DEBUG((priv)->mdev, stall_detect)) static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) { @@ -790,7 +885,8 @@ static int mlx5e_grp_per_prio_pfc_get_num_stats(struct mlx5e_priv *priv) { return (mlx5e_query_global_pause_combined(priv) + hweight8(mlx5e_query_pfc_combined(priv))) * - NUM_PPORT_PER_PRIO_PFC_COUNTERS; + NUM_PPORT_PER_PRIO_PFC_COUNTERS + + NUM_PPORT_PFC_STALL_COUNTERS(priv); } static int mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv, @@ -818,6 +914,10 @@ static int mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv, } } + for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_pfc_stall_stats_desc[i].format); + return idx; } @@ -845,6 +945,10 @@ static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv, } } + for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], + pport_pfc_stall_stats_desc, i); + return idx; } @@ -1003,6 +1107,8 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqe_err) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, recover) }, }; static const struct counter_desc ch_stats_desc[] = { @@ -1095,6 +1201,12 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .update_stats = mlx5e_grp_q_update_stats, }, { + .get_num_stats = mlx5e_grp_vnic_env_get_num_stats, + .fill_strings = mlx5e_grp_vnic_env_fill_strings, + .fill_stats = mlx5e_grp_vnic_env_fill_stats, + .update_stats = mlx5e_grp_vnic_env_update_stats, + }, + { .get_num_stats = mlx5e_grp_vport_get_num_stats, .fill_strings = mlx5e_grp_vport_fill_strings, .fill_stats = mlx5e_grp_vport_fill_stats, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 0b3320a2b072..53111a2df587 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -78,6 +78,8 @@ struct mlx5e_sw_stats { u64 tx_queue_wake; u64 tx_queue_dropped; u64 tx_xmit_more; + u64 tx_cqe_err; + u64 tx_recover; u64 rx_wqe_err; u64 rx_mpwqe_filler; u64 rx_buff_alloc_err; @@ -97,6 +99,11 @@ struct mlx5e_sw_stats { struct mlx5e_qcounter_stats { u32 rx_out_of_buffer; + u32 rx_if_down_packets; +}; + +struct mlx5e_vnic_env_stats { + __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)]; }; #define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \ @@ -192,6 +199,8 @@ struct mlx5e_sq_stats { u64 stopped; u64 wake; u64 dropped; + u64 cqe_err; + u64 recover; }; struct mlx5e_ch_stats { @@ -201,6 +210,7 @@ struct mlx5e_ch_stats { struct mlx5e_stats { struct mlx5e_sw_stats sw; struct mlx5e_qcounter_stats qcnt; + struct mlx5e_vnic_env_stats vnic; struct mlx5e_vport_stats vport; struct mlx5e_pport_stats pport; struct rtnl_link_stats64 vf_vport; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 43234cabf444..4197001f9801 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -675,6 +675,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5_flow_destination dest[2] = {}; struct mlx5_flow_act flow_act = { .action = attr->action, + .has_flow_tag = true, .flow_tag = attr->flow_tag, .encap_id = 0, }; @@ -2529,12 +2530,17 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) { attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) { - if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) || - tcf_vlan_push_prio(a)) - return -EOPNOTSUPP; - attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; - attr->vlan = tcf_vlan_push_vid(a); + attr->vlan_vid = tcf_vlan_push_vid(a); + if (mlx5_eswitch_vlan_actions_supported(priv->mdev)) { + attr->vlan_prio = tcf_vlan_push_prio(a); + attr->vlan_proto = tcf_vlan_push_proto(a); + if (!attr->vlan_proto) + attr->vlan_proto = htons(ETH_P_8021Q); + } else if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) || + tcf_vlan_push_prio(a)) { + return -EOPNOTSUPP; + } } else { /* action is TCA_VLAN_ACT_MODIFY */ return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 11b4f1089d1c..20297108528a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -417,6 +417,18 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) return mlx5e_sq_xmit(sq, skb, wqe, pi); } +static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq, + struct mlx5_err_cqe *err_cqe) +{ + u32 ci = mlx5_cqwq_get_ci(&sq->cq.wq); + + netdev_err(sq->channel->netdev, + "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n", + sq->cq.mcq.cqn, ci, sq->sqn, err_cqe->syndrome, + err_cqe->vendor_err_synd); + mlx5_dump_err_cqe(sq->cq.mdev, err_cqe); +} + bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) { struct mlx5e_txqsq *sq; @@ -456,6 +468,17 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) wqe_counter = be16_to_cpu(cqe->wqe_counter); + if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) { + if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, + &sq->state)) { + mlx5e_dump_error_cqe(sq, + (struct mlx5_err_cqe *)cqe); + queue_work(cq->channel->priv->wq, + &sq->recover.recover_work); + } + sq->stats.cqe_err++; + } + do { struct mlx5e_tx_wqe_info *wi; struct sk_buff *skb; @@ -509,7 +532,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) netdev_tx_completed_queue(sq->txq, npkts, nbytes); if (netif_tx_queue_stopped(sq->txq) && - mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM)) { + mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, + MLX5E_SQ_STOP_ROOM) && + !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { netif_tx_wake_queue(sq->txq); sq->stats.wake++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 25106e996a96..c1c94974e16b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -393,6 +393,51 @@ static void general_event_handler(struct mlx5_core_dev *dev, } } +/* caller must eventually call mlx5_cq_put on the returned cq */ +static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) +{ + struct mlx5_cq_table *table = &eq->cq_table; + struct mlx5_core_cq *cq = NULL; + + spin_lock(&table->lock); + cq = radix_tree_lookup(&table->tree, cqn); + if (likely(cq)) + mlx5_cq_hold(cq); + spin_unlock(&table->lock); + + return cq; +} + +static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn) +{ + struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn); + + if (unlikely(!cq)) { + mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); + return; + } + + ++cq->arm_sn; + + cq->comp(cq); + + mlx5_cq_put(cq); +} + +static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type) +{ + struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn); + + if (unlikely(!cq)) { + mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); + return; + } + + cq->event(cq, event_type); + + mlx5_cq_put(cq); +} + static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) { struct mlx5_eq *eq = eq_ptr; @@ -415,7 +460,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) switch (eqe->type) { case MLX5_EVENT_TYPE_COMP: cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; - mlx5_cq_completion(dev, cqn); + mlx5_eq_cq_completion(eq, cqn); break; case MLX5_EVENT_TYPE_DCT_DRAINED: rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; @@ -472,7 +517,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n", cqn, eqe->data.cq_err.syndrome); - mlx5_cq_event(dev, cqn, eqe->type); + mlx5_eq_cq_event(eq, cqn, eqe->type); break; case MLX5_EVENT_TYPE_PAGE_REQUEST: @@ -567,6 +612,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, enum mlx5_eq_type type) { + struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; irq_handler_t handler; @@ -576,6 +622,11 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, u32 *in; int err; + /* Init CQ table */ + memset(cq_table, 0, sizeof(*cq_table)); + spin_lock_init(&cq_table->lock); + INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); + eq->type = type; eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); eq->cons_index = 0; @@ -669,7 +720,6 @@ err_buf: mlx5_buf_free(dev, &eq->buf); return err; } -EXPORT_SYMBOL_GPL(mlx5_create_map_eq); int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { @@ -696,7 +746,40 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) return err; } -EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq); + +int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) +{ + struct mlx5_cq_table *table = &eq->cq_table; + int err; + + spin_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, cq->cqn, cq); + spin_unlock_irq(&table->lock); + + return err; +} + +int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) +{ + struct mlx5_cq_table *table = &eq->cq_table; + struct mlx5_core_cq *tmp; + + spin_lock_irq(&table->lock); + tmp = radix_tree_delete(&table->tree, cq->cqn); + spin_unlock_irq(&table->lock); + + if (!tmp) { + mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn); + return -ENOENT; + } + + if (tmp != cq) { + mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn); + return -EINVAL; + } + + return 0; +} int mlx5_eq_init(struct mlx5_core_dev *dev) { @@ -840,4 +923,3 @@ int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, MLX5_SET(query_eq_in, in, eq_number, eq->eqn); return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } -EXPORT_SYMBOL_GPL(mlx5_core_eq_query); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index c2b1d7d351fc..332bc56306bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1619,10 +1619,14 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); esw->mode = mode; - if (mode == SRIOV_LEGACY) + if (mode == SRIOV_LEGACY) { err = esw_create_legacy_fdb_table(esw, nvfs + 1); - else + } else { + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + err = esw_offloads_init(esw, nvfs + 1); + } + if (err) goto abort; @@ -1644,12 +1648,17 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) abort: esw->mode = SRIOV_NONE; + + if (mode == SRIOV_OFFLOADS) + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + return err; } void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; + int old_mode; int nvports; int i; @@ -1675,7 +1684,11 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) else if (esw->mode == SRIOV_OFFLOADS) esw_offloads_cleanup(esw, nvports); + old_mode = esw->mode; esw->mode = SRIOV_NONE; + + if (old_mode == SRIOV_OFFLOADS) + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); } int mlx5_eswitch_init(struct mlx5_core_dev *dev) @@ -2083,17 +2096,19 @@ unlock: return err; } -static void mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, - int vport_idx, - struct mlx5_vport_drop_stats *stats) +static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, + int vport_idx, + struct mlx5_vport_drop_stats *stats) { struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5_vport *vport = &esw->vports[vport_idx]; + u64 rx_discard_vport_down, tx_discard_vport_down; u64 bytes = 0; u16 idx = 0; + int err = 0; if (!vport->enabled || esw->mode != SRIOV_LEGACY) - return; + return 0; if (vport->egress.drop_counter) { idx = vport->egress.drop_counter->id; @@ -2104,6 +2119,23 @@ static void mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, idx = vport->ingress.drop_counter->id; mlx5_fc_query(dev, idx, &stats->tx_dropped, &bytes); } + + if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) && + !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) + return 0; + + err = mlx5_query_vport_down_stats(dev, vport_idx, + &rx_discard_vport_down, + &tx_discard_vport_down); + if (err) + return err; + + if (MLX5_CAP_GEN(dev, receive_discard_vport_down)) + stats->rx_dropped += rx_discard_vport_down; + if (MLX5_CAP_GEN(dev, transmit_discard_vport_down)) + stats->tx_dropped += tx_discard_vport_down; + + return 0; } int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, @@ -2167,7 +2199,9 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, vf_stats->broadcast = MLX5_GET_CTR(out, received_eth_broadcast.packets); - mlx5_eswitch_query_vport_drop_stats(esw->dev, vport, &stats); + err = mlx5_eswitch_query_vport_drop_stats(esw->dev, vport, &stats); + if (err) + goto free_out; vf_stats->rx_dropped = stats.rx_dropped; vf_stats->tx_dropped = stats.tx_dropped; @@ -2175,3 +2209,9 @@ free_out: kvfree(out); return err; } + +u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) +{ + return esw->mode; +} +EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 2fa037066b2f..4cd773fa55e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -37,19 +37,9 @@ #include <linux/if_link.h> #include <net/devlink.h> #include <linux/mlx5/device.h> +#include <linux/mlx5/eswitch.h> #include "lib/mpfs.h" -enum { - SRIOV_NONE, - SRIOV_LEGACY, - SRIOV_OFFLOADS -}; - -enum { - REP_ETH, - NUM_REP_TYPES, -}; - #ifdef CONFIG_MLX5_ESWITCH #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -139,29 +129,13 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_table *fdb; struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *miss_grp; - struct mlx5_flow_handle *miss_rule; + struct mlx5_flow_handle *miss_rule_uni; + struct mlx5_flow_handle *miss_rule_multi; int vlan_push_pop_refcount; } offloads; }; }; -struct mlx5_eswitch_rep; -struct mlx5_eswitch_rep_if { - int (*load)(struct mlx5_core_dev *dev, - struct mlx5_eswitch_rep *rep); - void (*unload)(struct mlx5_eswitch_rep *rep); - void *priv; - bool valid; -}; - -struct mlx5_eswitch_rep { - struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES]; - u16 vport; - u8 hw_id[ETH_ALEN]; - u16 vlan; - u32 vlan_refcount; -}; - struct mlx5_esw_offload { struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; @@ -231,9 +205,6 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, int vport, struct ifla_vf_stats *vf_stats); -struct mlx5_flow_handle * -mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, - u32 sqn); void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule); struct mlx5_flow_spec; @@ -256,15 +227,14 @@ enum { SET_VLAN_INSERT = BIT(1) }; -#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x4000 -#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x8000 - struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; struct mlx5_eswitch_rep *out_rep; int action; - u16 vlan; + __be16 vlan_proto; + u16 vlan_vid; + u8 vlan_prio; bool vlan_handled; u32 encap_id; u32 mod_hdr_id; @@ -278,13 +248,6 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode); int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode); int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap); int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap); -void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - struct mlx5_eswitch_rep_if *rep_if, - u8 rep_type); -void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - u8 rep_type); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type); int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, @@ -294,6 +257,12 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, int vport, u16 vlan, u8 qos, u8 set_flags); +static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev) +{ + return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan) && + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan); +} + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(dev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 99f583a15cc3..35e256eb2f6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -58,8 +58,16 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (esw->mode != SRIOV_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); - /* per flow vlan pop/push is emulated, don't set that into the firmware */ - flow_act.action = attr->action & ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + flow_act.action = attr->action; + /* if per flow vlan pop/push is emulated, don't set that into the firmware */ + if (!mlx5_eswitch_vlan_actions_supported(esw->dev)) + flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | + MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { + flow_act.vlan.ethtype = ntohs(attr->vlan_proto); + flow_act.vlan.vid = attr->vlan_vid; + flow_act.vlan.prio = attr->vlan_prio; + } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; @@ -88,10 +96,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_id = attr->mod_hdr_id; - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) flow_act.encap_id = attr->encap_id; rule = mlx5_add_flow_rules((struct mlx5_flow_table *)esw->fdb_table.fdb, @@ -185,7 +193,7 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, /* protects against (1) setting rules with different vlans to push and * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0) */ - if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan)) + if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan_vid)) goto out_notsupp; return 0; @@ -202,6 +210,10 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, bool push, pop, fwd; int err = 0; + /* nop if we're on the vlan push/pop non emulation mode */ + if (mlx5_eswitch_vlan_actions_supported(esw->dev)) + return 0; + push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); @@ -239,11 +251,11 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, if (vport->vlan_refcount) goto skip_set_push; - err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan, 0, + err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan_vid, 0, SET_VLAN_INSERT | SET_VLAN_STRIP); if (err) goto out; - vport->vlan = attr->vlan; + vport->vlan = attr->vlan_vid; skip_set_push: vport->vlan_refcount++; } @@ -261,6 +273,10 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, bool push, pop, fwd; int err = 0; + /* nop if we're on the vlan push/pop non emulation mode */ + if (mlx5_eswitch_vlan_actions_supported(esw->dev)) + return 0; + if (!attr->vlan_handled) return 0; @@ -338,6 +354,7 @@ out: kvfree(spec); return flow_rule; } +EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule); void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) { @@ -350,7 +367,11 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle *flow_rule = NULL; struct mlx5_flow_spec *spec; + void *headers_c; + void *headers_v; int err = 0; + u8 *dmac_c; + u8 *dmac_v; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { @@ -358,6 +379,13 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) goto out; } + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, + outer_headers.dmac_47_16); + dmac_c[0] = 0x01; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport_num = 0; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; @@ -366,11 +394,28 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); - esw_warn(esw->dev, "FDB: Failed to add miss flow rule err %d\n", err); + esw_warn(esw->dev, "FDB: Failed to add unicast miss flow rule err %d\n", err); goto out; } - esw->fdb_table.offloads.miss_rule = flow_rule; + esw->fdb_table.offloads.miss_rule_uni = flow_rule; + + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, + outer_headers.dmac_47_16); + dmac_v[0] = 0x01; + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err); + mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); + goto out; + } + + esw->fdb_table.offloads.miss_rule_multi = flow_rule; + out: kvfree(spec); return err; @@ -426,6 +471,7 @@ static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw) } #define MAX_PF_SQ 256 +#define MAX_SQ_NVPORTS 32 static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) { @@ -438,6 +484,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) struct mlx5_flow_group *g; void *match_criteria; u32 *flow_group_in; + u8 *dmac; esw_debug(esw->dev, "Create offloads FDB Tables\n"); flow_group_in = kvzalloc(inlen, GFP_KERNEL); @@ -455,7 +502,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) if (err) goto fast_fdb_err; - table_size = nvports + MAX_PF_SQ + 1; + table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2; ft_attr.max_fte = table_size; ft_attr.prio = FDB_SLOW_PATH; @@ -478,7 +525,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn); MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); - ix = nvports + MAX_PF_SQ; + ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ; MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1); @@ -492,10 +539,16 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) /* create miss group */ memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2); g = mlx5_create_flow_group(fdb, flow_group_in); if (IS_ERR(g)) { @@ -531,7 +584,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) return; esw_debug(esw->dev, "Destroy offloads FDB Tables\n"); - mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule); + mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi); + mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); @@ -789,14 +843,9 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) { int err; - /* disable PF RoCE so missed packets don't go through RoCE steering */ - mlx5_dev_list_lock(); - mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_dev_list_unlock(); - err = esw_create_offloads_fdb_tables(esw, nvports); if (err) - goto create_fdb_err; + return err; err = esw_create_offloads_table(esw); if (err) @@ -821,12 +870,6 @@ create_fg_err: create_ft_err: esw_destroy_offloads_fdb_tables(esw); -create_fdb_err: - /* enable back PF RoCE */ - mlx5_dev_list_lock(); - mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_dev_list_unlock(); - return err; } @@ -844,9 +887,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw) } /* enable back PF RoCE */ - mlx5_dev_list_lock(); - mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_dev_list_unlock(); + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); return err; } @@ -1160,10 +1201,12 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, rep_if->load = __rep_if->load; rep_if->unload = __rep_if->unload; + rep_if->get_proto_dev = __rep_if->get_proto_dev; rep_if->priv = __rep_if->priv; rep_if->valid = true; } +EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep); void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, int vport_index, u8 rep_type) @@ -1178,6 +1221,7 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, rep->rep_if[rep_type].valid = false; } +EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) { @@ -1188,3 +1232,35 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) rep = &offloads->vport_reps[UPLINK_REP_INDEX]; return rep->rep_if[rep_type].priv; } + +void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, + int vport, + u8 rep_type) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + struct mlx5_eswitch_rep *rep; + + if (vport == FDB_UPLINK_VPORT) + vport = UPLINK_REP_INDEX; + + rep = &offloads->vport_reps[vport]; + + if (rep->rep_if[rep_type].valid && + rep->rep_if[rep_type].get_proto_dev) + return rep->rep_if[rep_type].get_proto_dev(rep); + return NULL; +} +EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); + +void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type) +{ + return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type); +} +EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev); + +struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, + int vport) +{ + return &esw->offloads.vport_reps[vport]; +} +EXPORT_SYMBOL(mlx5_eswitch_vport_rep); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 35d0e33381ca..0f5da499a223 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -31,49 +31,91 @@ * */ +#include <linux/rhashtable.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/fs_helpers.h> +#include <linux/mlx5/fs.h> +#include <linux/rbtree.h> #include "mlx5_core.h" +#include "fs_cmd.h" #include "fpga/ipsec.h" #include "fpga/sdk.h" #include "fpga/core.h" #define SBU_QP_QUEUE_SIZE 8 +#define MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC (60 * 1000) -enum mlx5_ipsec_response_syndrome { - MLX5_IPSEC_RESPONSE_SUCCESS = 0, - MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1, - MLX5_IPSEC_RESPONSE_SADB_ISSUE = 2, - MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3, +enum mlx5_fpga_ipsec_cmd_status { + MLX5_FPGA_IPSEC_CMD_PENDING, + MLX5_FPGA_IPSEC_CMD_SEND_FAIL, + MLX5_FPGA_IPSEC_CMD_COMPLETE, }; -enum mlx5_fpga_ipsec_sacmd_status { - MLX5_FPGA_IPSEC_SACMD_PENDING, - MLX5_FPGA_IPSEC_SACMD_SEND_FAIL, - MLX5_FPGA_IPSEC_SACMD_COMPLETE, -}; - -struct mlx5_ipsec_command_context { +struct mlx5_fpga_ipsec_cmd_context { struct mlx5_fpga_dma_buf buf; - struct mlx5_accel_ipsec_sa sa; - enum mlx5_fpga_ipsec_sacmd_status status; + enum mlx5_fpga_ipsec_cmd_status status; + struct mlx5_ifc_fpga_ipsec_cmd_resp resp; int status_code; struct completion complete; struct mlx5_fpga_device *dev; struct list_head list; /* Item in pending_cmds */ + u8 command[0]; +}; + +struct mlx5_fpga_esp_xfrm; + +struct mlx5_fpga_ipsec_sa_ctx { + struct rhash_head hash; + struct mlx5_ifc_fpga_ipsec_sa hw_sa; + struct mlx5_core_dev *dev; + struct mlx5_fpga_esp_xfrm *fpga_xfrm; +}; + +struct mlx5_fpga_esp_xfrm { + unsigned int num_rules; + struct mlx5_fpga_ipsec_sa_ctx *sa_ctx; + struct mutex lock; /* xfrm lock */ + struct mlx5_accel_esp_xfrm accel_xfrm; +}; + +struct mlx5_fpga_ipsec_rule { + struct rb_node node; + struct fs_fte *fte; + struct mlx5_fpga_ipsec_sa_ctx *ctx; }; -struct mlx5_ipsec_sadb_resp { - __be32 syndrome; - __be32 sw_sa_handle; - u8 reserved[24]; -} __packed; +static const struct rhashtable_params rhash_sa = { + .key_len = FIELD_SIZEOF(struct mlx5_fpga_ipsec_sa_ctx, hw_sa), + .key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa), + .head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash), + .automatic_shrinking = true, + .min_size = 1, +}; struct mlx5_fpga_ipsec { + struct mlx5_fpga_device *fdev; struct list_head pending_cmds; spinlock_t pending_cmds_lock; /* Protects pending_cmds */ u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)]; struct mlx5_fpga_conn *conn; + + struct notifier_block fs_notifier_ingress_bypass; + struct notifier_block fs_notifier_egress; + + /* Map hardware SA --> SA context + * (mlx5_fpga_ipsec_sa) (mlx5_fpga_ipsec_sa_ctx) + * We will use this hash to avoid SAs duplication in fpga which + * aren't allowed + */ + struct rhashtable sa_hash; /* hw_sa -> mlx5_fpga_ipsec_sa_ctx */ + struct mutex sa_hash_lock; + + /* Tree holding all rules for this fpga device + * Key for searching a rule (mlx5_fpga_ipsec_rule) is (ft, id) + */ + struct rb_root rules_rb; + struct mutex rules_rb_lock; /* rules lock */ }; static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) @@ -97,28 +139,29 @@ static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn, struct mlx5_fpga_dma_buf *buf, u8 status) { - struct mlx5_ipsec_command_context *context; + struct mlx5_fpga_ipsec_cmd_context *context; if (status) { - context = container_of(buf, struct mlx5_ipsec_command_context, + context = container_of(buf, struct mlx5_fpga_ipsec_cmd_context, buf); mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n", status); - context->status = MLX5_FPGA_IPSEC_SACMD_SEND_FAIL; + context->status = MLX5_FPGA_IPSEC_CMD_SEND_FAIL; complete(&context->complete); } } -static inline int syndrome_to_errno(enum mlx5_ipsec_response_syndrome syndrome) +static inline +int syndrome_to_errno(enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome) { switch (syndrome) { - case MLX5_IPSEC_RESPONSE_SUCCESS: + case MLX5_FPGA_IPSEC_RESPONSE_SUCCESS: return 0; - case MLX5_IPSEC_RESPONSE_SADB_ISSUE: + case MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE: return -EEXIST; - case MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST: + case MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST: return -EINVAL; - case MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE: + case MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE: return -EIO; } return -EIO; @@ -126,9 +169,9 @@ static inline int syndrome_to_errno(enum mlx5_ipsec_response_syndrome syndrome) static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf) { - struct mlx5_ipsec_sadb_resp *resp = buf->sg[0].data; - struct mlx5_ipsec_command_context *context; - enum mlx5_ipsec_response_syndrome syndrome; + struct mlx5_ifc_fpga_ipsec_cmd_resp *resp = buf->sg[0].data; + struct mlx5_fpga_ipsec_cmd_context *context; + enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome; struct mlx5_fpga_device *fdev = cb_arg; unsigned long flags; @@ -138,12 +181,12 @@ static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf) return; } - mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x sa_id %x\n", - ntohl(resp->syndrome), ntohl(resp->sw_sa_handle)); + mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x\n", + ntohl(resp->syndrome)); spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); context = list_first_entry_or_null(&fdev->ipsec->pending_cmds, - struct mlx5_ipsec_command_context, + struct mlx5_fpga_ipsec_cmd_context, list); if (context) list_del(&context->list); @@ -155,51 +198,48 @@ static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf) } mlx5_fpga_dbg(fdev, "Handling response for %p\n", context); - if (context->sa.sw_sa_handle != resp->sw_sa_handle) { - mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n", - ntohl(context->sa.sw_sa_handle), - ntohl(resp->sw_sa_handle)); - return; - } - syndrome = ntohl(resp->syndrome); context->status_code = syndrome_to_errno(syndrome); - context->status = MLX5_FPGA_IPSEC_SACMD_COMPLETE; + context->status = MLX5_FPGA_IPSEC_CMD_COMPLETE; + memcpy(&context->resp, resp, sizeof(*resp)); if (context->status_code) - mlx5_fpga_warn(fdev, "IPSec SADB command failed with syndrome %08x\n", + mlx5_fpga_warn(fdev, "IPSec command failed with syndrome %08x\n", syndrome); + complete(&context->complete); } -void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev, - struct mlx5_accel_ipsec_sa *cmd) +static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev, + const void *cmd, int cmd_size) { - struct mlx5_ipsec_command_context *context; + struct mlx5_fpga_ipsec_cmd_context *context; struct mlx5_fpga_device *fdev = mdev->fpga; unsigned long flags; - int res = 0; + int res; - BUILD_BUG_ON((sizeof(struct mlx5_accel_ipsec_sa) & 3) != 0); if (!fdev || !fdev->ipsec) return ERR_PTR(-EOPNOTSUPP); - context = kzalloc(sizeof(*context), GFP_ATOMIC); + if (cmd_size & 3) + return ERR_PTR(-EINVAL); + + context = kzalloc(sizeof(*context) + cmd_size, GFP_ATOMIC); if (!context) return ERR_PTR(-ENOMEM); - memcpy(&context->sa, cmd, sizeof(*cmd)); + context->status = MLX5_FPGA_IPSEC_CMD_PENDING; + context->dev = fdev; context->buf.complete = mlx5_fpga_ipsec_send_complete; - context->buf.sg[0].size = sizeof(context->sa); - context->buf.sg[0].data = &context->sa; init_completion(&context->complete); - context->dev = fdev; + memcpy(&context->command, cmd, cmd_size); + context->buf.sg[0].size = cmd_size; + context->buf.sg[0].data = &context->command; + spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); list_add_tail(&context->list, &fdev->ipsec->pending_cmds); spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); - context->status = MLX5_FPGA_IPSEC_SACMD_PENDING; - res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf); if (res) { mlx5_fpga_warn(fdev, "Failure sending IPSec command: %d\n", @@ -214,47 +254,103 @@ void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev, return context; } -int mlx5_fpga_ipsec_sa_cmd_wait(void *ctx) +static int mlx5_fpga_ipsec_cmd_wait(void *ctx) { - struct mlx5_ipsec_command_context *context = ctx; + struct mlx5_fpga_ipsec_cmd_context *context = ctx; + unsigned long timeout = + msecs_to_jiffies(MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC); int res; - res = wait_for_completion_killable(&context->complete); - if (res) { + res = wait_for_completion_timeout(&context->complete, timeout); + if (!res) { mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n"); - return -EINTR; + return -ETIMEDOUT; } - if (context->status == MLX5_FPGA_IPSEC_SACMD_COMPLETE) + if (context->status == MLX5_FPGA_IPSEC_CMD_COMPLETE) res = context->status_code; else res = -EIO; - kfree(context); return res; } +static inline bool is_v2_sadb_supported(struct mlx5_fpga_ipsec *fipsec) +{ + if (MLX5_GET(ipsec_extended_cap, fipsec->caps, v2_command)) + return true; + return false; +} + +static int mlx5_fpga_ipsec_update_hw_sa(struct mlx5_fpga_device *fdev, + struct mlx5_ifc_fpga_ipsec_sa *hw_sa, + int opcode) +{ + struct mlx5_core_dev *dev = fdev->mdev; + struct mlx5_ifc_fpga_ipsec_sa *sa; + struct mlx5_fpga_ipsec_cmd_context *cmd_context; + size_t sa_cmd_size; + int err; + + hw_sa->ipsec_sa_v1.cmd = htonl(opcode); + if (is_v2_sadb_supported(fdev->ipsec)) + sa_cmd_size = sizeof(*hw_sa); + else + sa_cmd_size = sizeof(hw_sa->ipsec_sa_v1); + + cmd_context = (struct mlx5_fpga_ipsec_cmd_context *) + mlx5_fpga_ipsec_cmd_exec(dev, hw_sa, sa_cmd_size); + if (IS_ERR(cmd_context)) + return PTR_ERR(cmd_context); + + err = mlx5_fpga_ipsec_cmd_wait(cmd_context); + if (err) + goto out; + + sa = (struct mlx5_ifc_fpga_ipsec_sa *)&cmd_context->command; + if (sa->ipsec_sa_v1.sw_sa_handle != cmd_context->resp.sw_sa_handle) { + mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n", + ntohl(sa->ipsec_sa_v1.sw_sa_handle), + ntohl(cmd_context->resp.sw_sa_handle)); + err = -EIO; + } + +out: + kfree(cmd_context); + return err; +} + u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) { struct mlx5_fpga_device *fdev = mdev->fpga; u32 ret = 0; - if (mlx5_fpga_is_ipsec_device(mdev)) - ret |= MLX5_ACCEL_IPSEC_DEVICE; - else + if (mlx5_fpga_is_ipsec_device(mdev)) { + ret |= MLX5_ACCEL_IPSEC_CAP_DEVICE; + ret |= MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA; + } else { return ret; + } if (!fdev->ipsec) return ret; if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp)) - ret |= MLX5_ACCEL_IPSEC_ESP; + ret |= MLX5_ACCEL_IPSEC_CAP_ESP; if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6)) - ret |= MLX5_ACCEL_IPSEC_IPV6; + ret |= MLX5_ACCEL_IPSEC_CAP_IPV6; if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso)) - ret |= MLX5_ACCEL_IPSEC_LSO; + ret |= MLX5_ACCEL_IPSEC_CAP_LSO; + + if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer)) + ret |= MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER; + + if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esn)) { + ret |= MLX5_ACCEL_IPSEC_CAP_ESN; + ret |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN; + } return ret; } @@ -318,6 +414,829 @@ out: return ret; } +static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags) +{ + struct mlx5_fpga_ipsec_cmd_context *context; + struct mlx5_ifc_fpga_ipsec_cmd_cap cmd = {0}; + int err; + + cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP); + cmd.flags = htonl(flags); + context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd)); + if (IS_ERR(context)) { + err = PTR_ERR(context); + goto out; + } + + err = mlx5_fpga_ipsec_cmd_wait(context); + if (err) + goto out; + + if ((context->resp.flags & cmd.flags) != cmd.flags) { + mlx5_fpga_err(context->dev, "Failed to set capabilities. cmd 0x%08x vs resp 0x%08x\n", + cmd.flags, + context->resp.flags); + err = -EIO; + } + +out: + return err; +} + +static int mlx5_fpga_ipsec_enable_supported_caps(struct mlx5_core_dev *mdev) +{ + u32 dev_caps = mlx5_fpga_ipsec_device_caps(mdev); + u32 flags = 0; + + if (dev_caps & MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER) + flags |= MLX5_FPGA_IPSEC_CAP_NO_TRAILER; + + return mlx5_fpga_ipsec_set_caps(mdev, flags); +} + +static void +mlx5_fpga_ipsec_build_hw_xfrm(struct mlx5_core_dev *mdev, + const struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs, + struct mlx5_ifc_fpga_ipsec_sa *hw_sa) +{ + const struct aes_gcm_keymat *aes_gcm = &xfrm_attrs->keymat.aes_gcm; + + /* key */ + memcpy(&hw_sa->ipsec_sa_v1.key_enc, aes_gcm->aes_key, + aes_gcm->key_len / 8); + /* Duplicate 128 bit key twice according to HW layout */ + if (aes_gcm->key_len == 128) + memcpy(&hw_sa->ipsec_sa_v1.key_enc[16], + aes_gcm->aes_key, aes_gcm->key_len / 8); + + /* salt and seq_iv */ + memcpy(&hw_sa->ipsec_sa_v1.gcm.salt_iv, &aes_gcm->seq_iv, + sizeof(aes_gcm->seq_iv)); + memcpy(&hw_sa->ipsec_sa_v1.gcm.salt, &aes_gcm->salt, + sizeof(aes_gcm->salt)); + + /* esn */ + if (xfrm_attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) { + hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_ESN_EN; + hw_sa->ipsec_sa_v1.flags |= + (xfrm_attrs->flags & + MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ? + MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0; + hw_sa->esn = htonl(xfrm_attrs->esn); + } else { + hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_ESN_EN; + hw_sa->ipsec_sa_v1.flags &= + ~(xfrm_attrs->flags & + MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ? + MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0; + hw_sa->esn = 0; + } + + /* rx handle */ + hw_sa->ipsec_sa_v1.sw_sa_handle = htonl(xfrm_attrs->sa_handle); + + /* enc mode */ + switch (aes_gcm->key_len) { + case 128: + hw_sa->ipsec_sa_v1.enc_mode = + MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128; + break; + case 256: + hw_sa->ipsec_sa_v1.enc_mode = + MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128; + break; + } + + /* flags */ + hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_SA_VALID | + MLX5_FPGA_IPSEC_SA_SPI_EN | + MLX5_FPGA_IPSEC_SA_IP_ESP; + + if (xfrm_attrs->action & MLX5_ACCEL_ESP_ACTION_ENCRYPT) + hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_DIR_SX; + else + hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_DIR_SX; +} + +static void +mlx5_fpga_ipsec_build_hw_sa(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs, + const __be32 saddr[4], + const __be32 daddr[4], + const __be32 spi, bool is_ipv6, + struct mlx5_ifc_fpga_ipsec_sa *hw_sa) +{ + mlx5_fpga_ipsec_build_hw_xfrm(mdev, xfrm_attrs, hw_sa); + + /* IPs */ + memcpy(hw_sa->ipsec_sa_v1.sip, saddr, sizeof(hw_sa->ipsec_sa_v1.sip)); + memcpy(hw_sa->ipsec_sa_v1.dip, daddr, sizeof(hw_sa->ipsec_sa_v1.dip)); + + /* SPI */ + hw_sa->ipsec_sa_v1.spi = spi; + + /* flags */ + if (is_ipv6) + hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_IPV6; +} + +static bool is_full_mask(const void *p, size_t len) +{ + WARN_ON(len % 4); + + return !memchr_inv(p, 0xff, len); +} + +static bool validate_fpga_full_mask(struct mlx5_core_dev *dev, + const u32 *match_c, + const u32 *match_v) +{ + const void *misc_params_c = MLX5_ADDR_OF(fte_match_param, + match_c, + misc_parameters); + const void *headers_c = MLX5_ADDR_OF(fte_match_param, + match_c, + outer_headers); + const void *headers_v = MLX5_ADDR_OF(fte_match_param, + match_v, + outer_headers); + + if (mlx5_fs_is_outer_ipv4_flow(dev, headers_c, headers_v)) { + const void *s_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, + headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + const void *d_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, + headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + if (!is_full_mask(s_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout, + ipv4)) || + !is_full_mask(d_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout, + ipv4))) + return false; + } else { + const void *s_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, + headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6); + const void *d_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, + headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6); + + if (!is_full_mask(s_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)) || + !is_full_mask(d_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6))) + return false; + } + + if (!is_full_mask(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, + outer_esp_spi), + MLX5_FLD_SZ_BYTES(fte_match_set_misc, outer_esp_spi))) + return false; + + return true; +} + +static bool mlx5_is_fpga_ipsec_rule(struct mlx5_core_dev *dev, + u8 match_criteria_enable, + const u32 *match_c, + const u32 *match_v) +{ + u32 ipsec_dev_caps = mlx5_accel_ipsec_device_caps(dev); + bool ipv6_flow; + + ipv6_flow = mlx5_fs_is_outer_ipv6_flow(dev, match_c, match_v); + + if (!(match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) || + mlx5_fs_is_outer_udp_flow(match_c, match_v) || + mlx5_fs_is_outer_tcp_flow(match_c, match_v) || + mlx5_fs_is_vxlan_flow(match_c) || + !(mlx5_fs_is_outer_ipv4_flow(dev, match_c, match_v) || + ipv6_flow)) + return false; + + if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_DEVICE)) + return false; + + if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_ESP) && + mlx5_fs_is_outer_ipsec_flow(match_c)) + return false; + + if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_IPV6) && + ipv6_flow) + return false; + + if (!validate_fpga_full_mask(dev, match_c, match_v)) + return false; + + return true; +} + +static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, + u8 match_criteria_enable, + const u32 *match_c, + const u32 *match_v, + struct mlx5_flow_act *flow_act) +{ + const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + bool is_dmac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_47_16) || + MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_15_0); + bool is_smac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_47_16) || + MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_15_0); + int ret; + + ret = mlx5_is_fpga_ipsec_rule(dev, match_criteria_enable, match_c, + match_v); + if (!ret) + return ret; + + if (is_dmac || is_smac || + (match_criteria_enable & + ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) || + (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) || + flow_act->has_flow_tag) + return false; + + return true; +} + +void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm *accel_xfrm, + const __be32 saddr[4], + const __be32 daddr[4], + const __be32 spi, bool is_ipv6) +{ + struct mlx5_fpga_ipsec_sa_ctx *sa_ctx; + struct mlx5_fpga_esp_xfrm *fpga_xfrm = + container_of(accel_xfrm, typeof(*fpga_xfrm), + accel_xfrm); + struct mlx5_fpga_device *fdev = mdev->fpga; + struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; + int opcode, err; + void *context; + + /* alloc SA */ + sa_ctx = kzalloc(sizeof(*sa_ctx), GFP_KERNEL); + if (!sa_ctx) + return ERR_PTR(-ENOMEM); + + sa_ctx->dev = mdev; + + /* build candidate SA */ + mlx5_fpga_ipsec_build_hw_sa(mdev, &accel_xfrm->attrs, + saddr, daddr, spi, is_ipv6, + &sa_ctx->hw_sa); + + mutex_lock(&fpga_xfrm->lock); + + if (fpga_xfrm->sa_ctx) { /* multiple rules for same accel_xfrm */ + /* all rules must be with same IPs and SPI */ + if (memcmp(&sa_ctx->hw_sa, &fpga_xfrm->sa_ctx->hw_sa, + sizeof(sa_ctx->hw_sa))) { + context = ERR_PTR(-EINVAL); + goto exists; + } + + ++fpga_xfrm->num_rules; + context = fpga_xfrm->sa_ctx; + goto exists; + } + + /* This is unbounded fpga_xfrm, try to add to hash */ + mutex_lock(&fipsec->sa_hash_lock); + + err = rhashtable_lookup_insert_fast(&fipsec->sa_hash, &sa_ctx->hash, + rhash_sa); + if (err) { + /* Can't bound different accel_xfrm to already existing sa_ctx. + * This is because we can't support multiple ketmats for + * same IPs and SPI + */ + context = ERR_PTR(-EEXIST); + goto unlock_hash; + } + + /* Bound accel_xfrm to sa_ctx */ + opcode = is_v2_sadb_supported(fdev->ipsec) ? + MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 : + MLX5_FPGA_IPSEC_CMD_OP_ADD_SA; + err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode); + sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0; + if (err) { + context = ERR_PTR(err); + goto delete_hash; + } + + mutex_unlock(&fipsec->sa_hash_lock); + + ++fpga_xfrm->num_rules; + fpga_xfrm->sa_ctx = sa_ctx; + sa_ctx->fpga_xfrm = fpga_xfrm; + + mutex_unlock(&fpga_xfrm->lock); + + return sa_ctx; + +delete_hash: + WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash, + rhash_sa)); +unlock_hash: + mutex_unlock(&fipsec->sa_hash_lock); + +exists: + mutex_unlock(&fpga_xfrm->lock); + kfree(sa_ctx); + return context; +} + +static void * +mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev, + struct fs_fte *fte, + bool is_egress) +{ + struct mlx5_accel_esp_xfrm *accel_xfrm; + __be32 saddr[4], daddr[4], spi; + struct mlx5_flow_group *fg; + bool is_ipv6 = false; + + fs_get_obj(fg, fte->node.parent); + /* validate */ + if (is_egress && + !mlx5_is_fpga_egress_ipsec_rule(mdev, + fg->mask.match_criteria_enable, + fg->mask.match_criteria, + fte->val, + &fte->action)) + return ERR_PTR(-EINVAL); + else if (!mlx5_is_fpga_ipsec_rule(mdev, + fg->mask.match_criteria_enable, + fg->mask.match_criteria, + fte->val)) + return ERR_PTR(-EINVAL); + + /* get xfrm context */ + accel_xfrm = + (struct mlx5_accel_esp_xfrm *)fte->action.esp_id; + + /* IPs */ + if (mlx5_fs_is_outer_ipv4_flow(mdev, fg->mask.match_criteria, + fte->val)) { + memcpy(&saddr[3], + MLX5_ADDR_OF(fte_match_set_lyr_2_4, + fte->val, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + sizeof(saddr[3])); + memcpy(&daddr[3], + MLX5_ADDR_OF(fte_match_set_lyr_2_4, + fte->val, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + sizeof(daddr[3])); + } else { + memcpy(saddr, + MLX5_ADDR_OF(fte_match_param, + fte->val, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(saddr)); + memcpy(daddr, + MLX5_ADDR_OF(fte_match_param, + fte->val, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(daddr)); + is_ipv6 = true; + } + + /* SPI */ + spi = MLX5_GET_BE(typeof(spi), + fte_match_param, fte->val, + misc_parameters.outer_esp_spi); + + /* create */ + return mlx5_fpga_ipsec_create_sa_ctx(mdev, accel_xfrm, + saddr, daddr, + spi, is_ipv6); +} + +static void +mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx) +{ + struct mlx5_fpga_device *fdev = sa_ctx->dev->fpga; + struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; + int opcode = is_v2_sadb_supported(fdev->ipsec) ? + MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 : + MLX5_FPGA_IPSEC_CMD_OP_DEL_SA; + int err; + + err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode); + sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0; + if (err) { + WARN_ON(err); + return; + } + + mutex_lock(&fipsec->sa_hash_lock); + WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash, + rhash_sa)); + mutex_unlock(&fipsec->sa_hash_lock); +} + +void mlx5_fpga_ipsec_delete_sa_ctx(void *context) +{ + struct mlx5_fpga_esp_xfrm *fpga_xfrm = + ((struct mlx5_fpga_ipsec_sa_ctx *)context)->fpga_xfrm; + + mutex_lock(&fpga_xfrm->lock); + if (!--fpga_xfrm->num_rules) { + mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx); + fpga_xfrm->sa_ctx = NULL; + } + mutex_unlock(&fpga_xfrm->lock); +} + +static inline struct mlx5_fpga_ipsec_rule * +_rule_search(struct rb_root *root, struct fs_fte *fte) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct mlx5_fpga_ipsec_rule *rule = + container_of(node, struct mlx5_fpga_ipsec_rule, + node); + + if (rule->fte < fte) + node = node->rb_left; + else if (rule->fte > fte) + node = node->rb_right; + else + return rule; + } + return NULL; +} + +static struct mlx5_fpga_ipsec_rule * +rule_search(struct mlx5_fpga_ipsec *ipsec_dev, struct fs_fte *fte) +{ + struct mlx5_fpga_ipsec_rule *rule; + + mutex_lock(&ipsec_dev->rules_rb_lock); + rule = _rule_search(&ipsec_dev->rules_rb, fte); + mutex_unlock(&ipsec_dev->rules_rb_lock); + + return rule; +} + +static inline int _rule_insert(struct rb_root *root, + struct mlx5_fpga_ipsec_rule *rule) +{ + struct rb_node **new = &root->rb_node, *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct mlx5_fpga_ipsec_rule *this = + container_of(*new, struct mlx5_fpga_ipsec_rule, + node); + + parent = *new; + if (rule->fte < this->fte) + new = &((*new)->rb_left); + else if (rule->fte > this->fte) + new = &((*new)->rb_right); + else + return -EEXIST; + } + + /* Add new node and rebalance tree. */ + rb_link_node(&rule->node, parent, new); + rb_insert_color(&rule->node, root); + + return 0; +} + +static int rule_insert(struct mlx5_fpga_ipsec *ipsec_dev, + struct mlx5_fpga_ipsec_rule *rule) +{ + int ret; + + mutex_lock(&ipsec_dev->rules_rb_lock); + ret = _rule_insert(&ipsec_dev->rules_rb, rule); + mutex_unlock(&ipsec_dev->rules_rb_lock); + + return ret; +} + +static inline void _rule_delete(struct mlx5_fpga_ipsec *ipsec_dev, + struct mlx5_fpga_ipsec_rule *rule) +{ + struct rb_root *root = &ipsec_dev->rules_rb; + + mutex_lock(&ipsec_dev->rules_rb_lock); + rb_erase(&rule->node, root); + mutex_unlock(&ipsec_dev->rules_rb_lock); +} + +static void rule_delete(struct mlx5_fpga_ipsec *ipsec_dev, + struct mlx5_fpga_ipsec_rule *rule) +{ + _rule_delete(ipsec_dev, rule); + kfree(rule); +} + +struct mailbox_mod { + uintptr_t saved_esp_id; + u32 saved_action; + u32 saved_outer_esp_spi_value; +}; + +static void restore_spec_mailbox(struct fs_fte *fte, + struct mailbox_mod *mbox_mod) +{ + char *misc_params_v = MLX5_ADDR_OF(fte_match_param, + fte->val, + misc_parameters); + + MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, + mbox_mod->saved_outer_esp_spi_value); + fte->action.action |= mbox_mod->saved_action; + fte->action.esp_id = (uintptr_t)mbox_mod->saved_esp_id; +} + +static void modify_spec_mailbox(struct mlx5_core_dev *mdev, + struct fs_fte *fte, + struct mailbox_mod *mbox_mod) +{ + char *misc_params_v = MLX5_ADDR_OF(fte_match_param, + fte->val, + misc_parameters); + + mbox_mod->saved_esp_id = fte->action.esp_id; + mbox_mod->saved_action = fte->action.action & + (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT); + mbox_mod->saved_outer_esp_spi_value = + MLX5_GET(fte_match_set_misc, misc_params_v, + outer_esp_spi); + + fte->action.esp_id = 0; + fte->action.action &= ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT); + if (!MLX5_CAP_FLOWTABLE(mdev, + flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) + MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, 0); +} + +static enum fs_flow_table_type egress_to_fs_ft(bool egress) +{ + return egress ? FS_FT_NIC_TX : FS_FT_NIC_RX; +} + +static int fpga_ipsec_fs_create_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, + unsigned int *group_id, + bool is_egress) +{ + int (*create_flow_group)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, u32 *in, + unsigned int *group_id) = + mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_flow_group; + char *misc_params_c = MLX5_ADDR_OF(create_flow_group_in, in, + match_criteria.misc_parameters); + u32 saved_outer_esp_spi_mask; + u8 match_criteria_enable; + int ret; + + if (MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) + return create_flow_group(dev, ft, in, group_id); + + match_criteria_enable = + MLX5_GET(create_flow_group_in, in, match_criteria_enable); + saved_outer_esp_spi_mask = + MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi); + if (!match_criteria_enable || !saved_outer_esp_spi_mask) + return create_flow_group(dev, ft, in, group_id); + + MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 0); + + if (!(*misc_params_c) && + !memcmp(misc_params_c, misc_params_c + 1, MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1)) + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + match_criteria_enable & ~MLX5_MATCH_MISC_PARAMETERS); + + ret = create_flow_group(dev, ft, in, group_id); + + MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, saved_outer_esp_spi_mask); + MLX5_SET(create_flow_group_in, in, match_criteria_enable, match_criteria_enable); + + return ret; +} + +static int fpga_ipsec_fs_create_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg, + struct fs_fte *fte, + bool is_egress) +{ + int (*create_fte)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg, + struct fs_fte *fte) = + mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_fte; + struct mlx5_fpga_device *fdev = dev->fpga; + struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; + struct mlx5_fpga_ipsec_rule *rule; + bool is_esp = fte->action.esp_id; + struct mailbox_mod mbox_mod; + int ret; + + if (!is_esp || + !(fte->action.action & + (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) + return create_fte(dev, ft, fg, fte); + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return -ENOMEM; + + rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress); + if (IS_ERR(rule->ctx)) { + int err = PTR_ERR(rule->ctx); + kfree(rule); + return err; + } + + rule->fte = fte; + WARN_ON(rule_insert(fipsec, rule)); + + modify_spec_mailbox(dev, fte, &mbox_mod); + ret = create_fte(dev, ft, fg, fte); + restore_spec_mailbox(fte, &mbox_mod); + if (ret) { + _rule_delete(fipsec, rule); + mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx); + kfree(rule); + } + + return ret; +} + +static int fpga_ipsec_fs_update_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id, + int modify_mask, + struct fs_fte *fte, + bool is_egress) +{ + int (*update_fte)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id, + int modify_mask, + struct fs_fte *fte) = + mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->update_fte; + bool is_esp = fte->action.esp_id; + struct mailbox_mod mbox_mod; + int ret; + + if (!is_esp || + !(fte->action.action & + (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) + return update_fte(dev, ft, group_id, modify_mask, fte); + + modify_spec_mailbox(dev, fte, &mbox_mod); + ret = update_fte(dev, ft, group_id, modify_mask, fte); + restore_spec_mailbox(fte, &mbox_mod); + + return ret; +} + +static int fpga_ipsec_fs_delete_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_fte *fte, + bool is_egress) +{ + int (*delete_fte)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_fte *fte) = + mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->delete_fte; + struct mlx5_fpga_device *fdev = dev->fpga; + struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; + struct mlx5_fpga_ipsec_rule *rule; + bool is_esp = fte->action.esp_id; + struct mailbox_mod mbox_mod; + int ret; + + if (!is_esp || + !(fte->action.action & + (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) + return delete_fte(dev, ft, fte); + + rule = rule_search(fipsec, fte); + if (!rule) + return -ENOENT; + + mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx); + rule_delete(fipsec, rule); + + modify_spec_mailbox(dev, fte, &mbox_mod); + ret = delete_fte(dev, ft, fte); + restore_spec_mailbox(fte, &mbox_mod); + + return ret; +} + +static int +mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, + unsigned int *group_id) +{ + return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, true); +} + +static int +mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg, + struct fs_fte *fte) +{ + return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, true); +} + +static int +mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id, + int modify_mask, + struct fs_fte *fte) +{ + return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte, + true); +} + +static int +mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_fte *fte) +{ + return fpga_ipsec_fs_delete_fte(dev, ft, fte, true); +} + +static int +mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, + unsigned int *group_id) +{ + return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, false); +} + +static int +mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg, + struct fs_fte *fte) +{ + return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, false); +} + +static int +mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id, + int modify_mask, + struct fs_fte *fte) +{ + return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte, + false); +} + +static int +mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_fte *fte) +{ + return fpga_ipsec_fs_delete_fte(dev, ft, fte, false); +} + +static struct mlx5_flow_cmds fpga_ipsec_ingress; +static struct mlx5_flow_cmds fpga_ipsec_egress; + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type) +{ + switch (type) { + case FS_FT_NIC_RX: + return &fpga_ipsec_ingress; + case FS_FT_NIC_TX: + return &fpga_ipsec_egress; + default: + WARN_ON(true); + return NULL; + } +} + int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev) { struct mlx5_fpga_conn_attr init_attr = {0}; @@ -332,6 +1251,8 @@ int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev) if (!fdev->ipsec) return -ENOMEM; + fdev->ipsec->fdev = fdev; + err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps), fdev->ipsec->caps); if (err) { @@ -355,14 +1276,47 @@ int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev) goto error; } fdev->ipsec->conn = conn; + + err = rhashtable_init(&fdev->ipsec->sa_hash, &rhash_sa); + if (err) + goto err_destroy_conn; + mutex_init(&fdev->ipsec->sa_hash_lock); + + fdev->ipsec->rules_rb = RB_ROOT; + mutex_init(&fdev->ipsec->rules_rb_lock); + + err = mlx5_fpga_ipsec_enable_supported_caps(mdev); + if (err) { + mlx5_fpga_err(fdev, "Failed to enable IPSec extended capabilities: %d\n", + err); + goto err_destroy_hash; + } + return 0; +err_destroy_hash: + rhashtable_destroy(&fdev->ipsec->sa_hash); + +err_destroy_conn: + mlx5_fpga_sbu_conn_destroy(conn); + error: kfree(fdev->ipsec); fdev->ipsec = NULL; return err; } +static void destroy_rules_rb(struct rb_root *root) +{ + struct mlx5_fpga_ipsec_rule *r, *tmp; + + rbtree_postorder_for_each_entry_safe(r, tmp, root, node) { + rb_erase(&r->node, root); + mlx5_fpga_ipsec_delete_sa_ctx(r->ctx); + kfree(r); + } +} + void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev) { struct mlx5_fpga_device *fdev = mdev->fpga; @@ -370,7 +1324,209 @@ void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev) if (!mlx5_fpga_is_ipsec_device(mdev)) return; + destroy_rules_rb(&fdev->ipsec->rules_rb); + rhashtable_destroy(&fdev->ipsec->sa_hash); + mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn); kfree(fdev->ipsec); fdev->ipsec = NULL; } + +void mlx5_fpga_ipsec_build_fs_cmds(void) +{ + /* ingress */ + fpga_ipsec_ingress.create_flow_table = + mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->create_flow_table; + fpga_ipsec_ingress.destroy_flow_table = + mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_table; + fpga_ipsec_ingress.modify_flow_table = + mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->modify_flow_table; + fpga_ipsec_ingress.create_flow_group = + mlx5_fpga_ipsec_fs_create_flow_group_ingress; + fpga_ipsec_ingress.destroy_flow_group = + mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_group; + fpga_ipsec_ingress.create_fte = + mlx5_fpga_ipsec_fs_create_fte_ingress; + fpga_ipsec_ingress.update_fte = + mlx5_fpga_ipsec_fs_update_fte_ingress; + fpga_ipsec_ingress.delete_fte = + mlx5_fpga_ipsec_fs_delete_fte_ingress; + fpga_ipsec_ingress.update_root_ft = + mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->update_root_ft; + + /* egress */ + fpga_ipsec_egress.create_flow_table = + mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->create_flow_table; + fpga_ipsec_egress.destroy_flow_table = + mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_table; + fpga_ipsec_egress.modify_flow_table = + mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->modify_flow_table; + fpga_ipsec_egress.create_flow_group = + mlx5_fpga_ipsec_fs_create_flow_group_egress; + fpga_ipsec_egress.destroy_flow_group = + mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_group; + fpga_ipsec_egress.create_fte = + mlx5_fpga_ipsec_fs_create_fte_egress; + fpga_ipsec_egress.update_fte = + mlx5_fpga_ipsec_fs_update_fte_egress; + fpga_ipsec_egress.delete_fte = + mlx5_fpga_ipsec_fs_delete_fte_egress; + fpga_ipsec_egress.update_root_ft = + mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->update_root_ft; +} + +static int +mlx5_fpga_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev, + const struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + if (attrs->tfc_pad) { + mlx5_core_err(mdev, "Cannot offload xfrm states with tfc padding\n"); + return -EOPNOTSUPP; + } + + if (attrs->replay_type != MLX5_ACCEL_ESP_REPLAY_NONE) { + mlx5_core_err(mdev, "Cannot offload xfrm states with anti replay\n"); + return -EOPNOTSUPP; + } + + if (attrs->keymat_type != MLX5_ACCEL_ESP_KEYMAT_AES_GCM) { + mlx5_core_err(mdev, "Only aes gcm keymat is supported\n"); + return -EOPNOTSUPP; + } + + if (attrs->keymat.aes_gcm.iv_algo != + MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ) { + mlx5_core_err(mdev, "Only iv sequence algo is supported\n"); + return -EOPNOTSUPP; + } + + if (attrs->keymat.aes_gcm.icv_len != 128) { + mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n"); + return -EOPNOTSUPP; + } + + if (attrs->keymat.aes_gcm.key_len != 128 && + attrs->keymat.aes_gcm.key_len != 256) { + mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n"); + return -EOPNOTSUPP; + } + + if ((attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) && + (!MLX5_GET(ipsec_extended_cap, mdev->fpga->ipsec->caps, + v2_command))) { + mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +struct mlx5_accel_esp_xfrm * +mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev, + const struct mlx5_accel_esp_xfrm_attrs *attrs, + u32 flags) +{ + struct mlx5_fpga_esp_xfrm *fpga_xfrm; + + if (!(flags & MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA)) { + mlx5_core_warn(mdev, "Tried to create an esp action without metadata\n"); + return ERR_PTR(-EINVAL); + } + + if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { + mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + fpga_xfrm = kzalloc(sizeof(*fpga_xfrm), GFP_KERNEL); + if (!fpga_xfrm) + return ERR_PTR(-ENOMEM); + + mutex_init(&fpga_xfrm->lock); + memcpy(&fpga_xfrm->accel_xfrm.attrs, attrs, + sizeof(fpga_xfrm->accel_xfrm.attrs)); + + return &fpga_xfrm->accel_xfrm; +} + +void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) +{ + struct mlx5_fpga_esp_xfrm *fpga_xfrm = + container_of(xfrm, struct mlx5_fpga_esp_xfrm, + accel_xfrm); + /* assuming no sa_ctx are connected to this xfrm_ctx */ + kfree(fpga_xfrm); +} + +int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, + const struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + struct mlx5_core_dev *mdev = xfrm->mdev; + struct mlx5_fpga_device *fdev = mdev->fpga; + struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; + struct mlx5_fpga_esp_xfrm *fpga_xfrm; + struct mlx5_ifc_fpga_ipsec_sa org_hw_sa; + + int err = 0; + + if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs))) + return 0; + + if (!mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { + mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n"); + return -EOPNOTSUPP; + } + + if (is_v2_sadb_supported(fipsec)) { + mlx5_core_warn(mdev, "Modify esp is not supported\n"); + return -EOPNOTSUPP; + } + + fpga_xfrm = container_of(xfrm, struct mlx5_fpga_esp_xfrm, accel_xfrm); + + mutex_lock(&fpga_xfrm->lock); + + if (!fpga_xfrm->sa_ctx) + /* Unbounded xfrm, chane only sw attrs */ + goto change_sw_xfrm_attrs; + + /* copy original hw sa */ + memcpy(&org_hw_sa, &fpga_xfrm->sa_ctx->hw_sa, sizeof(org_hw_sa)); + mutex_lock(&fipsec->sa_hash_lock); + /* remove original hw sa from hash */ + WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, + &fpga_xfrm->sa_ctx->hash, rhash_sa)); + /* update hw_sa with new xfrm attrs*/ + mlx5_fpga_ipsec_build_hw_xfrm(xfrm->mdev, attrs, + &fpga_xfrm->sa_ctx->hw_sa); + /* try to insert new hw_sa to hash */ + err = rhashtable_insert_fast(&fipsec->sa_hash, + &fpga_xfrm->sa_ctx->hash, rhash_sa); + if (err) + goto rollback_sa; + + /* modify device with new hw_sa */ + err = mlx5_fpga_ipsec_update_hw_sa(fdev, &fpga_xfrm->sa_ctx->hw_sa, + MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2); + fpga_xfrm->sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0; + if (err) + WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, + &fpga_xfrm->sa_ctx->hash, + rhash_sa)); +rollback_sa: + if (err) { + /* return original hw_sa to hash */ + memcpy(&fpga_xfrm->sa_ctx->hw_sa, &org_hw_sa, + sizeof(org_hw_sa)); + WARN_ON(rhashtable_insert_fast(&fipsec->sa_hash, + &fpga_xfrm->sa_ctx->hash, + rhash_sa)); + } + mutex_unlock(&fipsec->sa_hash_lock); + +change_sw_xfrm_attrs: + if (!err) + memcpy(&xfrm->attrs, attrs, sizeof(xfrm->attrs)); + mutex_unlock(&fpga_xfrm->lock); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h index 26a3e4b56972..2b5e63b0d4d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h @@ -35,33 +35,38 @@ #define __MLX5_FPGA_IPSEC_H__ #include "accel/ipsec.h" +#include "fs_cmd.h" #ifdef CONFIG_MLX5_FPGA -void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev, - struct mlx5_accel_ipsec_sa *cmd); -int mlx5_fpga_ipsec_sa_cmd_wait(void *context); - u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev); unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev); int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, unsigned int counters_count); +void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm *accel_xfrm, + const __be32 saddr[4], + const __be32 daddr[4], + const __be32 spi, bool is_ipv6); +void mlx5_fpga_ipsec_delete_sa_ctx(void *context); + int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev); void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev); +void mlx5_fpga_ipsec_build_fs_cmds(void); -#else +struct mlx5_accel_esp_xfrm * +mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev, + const struct mlx5_accel_esp_xfrm_attrs *attrs, + u32 flags); +void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm); +int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, + const struct mlx5_accel_esp_xfrm_attrs *attrs); -static inline void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev, - struct mlx5_accel_ipsec_sa *cmd) -{ - return ERR_PTR(-EOPNOTSUPP); -} +const struct mlx5_flow_cmds * +mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type); -static inline int mlx5_fpga_ipsec_sa_cmd_wait(void *context) -{ - return -EOPNOTSUPP; -} +#else static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) { @@ -80,6 +85,20 @@ static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, return 0; } +static inline void * +mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm *accel_xfrm, + const __be32 saddr[4], + const __be32 daddr[4], + const __be32 spi, bool is_ipv6) +{ + return NULL; +} + +static inline void mlx5_fpga_ipsec_delete_sa_ctx(void *context) +{ +} + static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev) { return 0; @@ -89,6 +108,35 @@ static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev) { } +static inline void mlx5_fpga_ipsec_build_fs_cmds(void) +{ +} + +static inline struct mlx5_accel_esp_xfrm * +mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev, + const struct mlx5_accel_esp_xfrm_attrs *attrs, + u32 flags) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) +{ +} + +static inline int +mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, + const struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + return -EOPNOTSUPP; +} + +static inline const struct mlx5_flow_cmds * +mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type) +{ + return mlx5_fs_cmd_get_default(type); +} + #endif /* CONFIG_MLX5_FPGA */ #endif /* __MLX5_FPGA_SADB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 881e2e55840c..ef5afd7c9325 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -39,9 +39,81 @@ #include "mlx5_core.h" #include "eswitch.h" -int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, u32 underlay_qpn, - bool disconnect) +static int mlx5_cmd_stub_update_root_ft(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 underlay_qpn, + bool disconnect) +{ + return 0; +} + +static int mlx5_cmd_stub_create_flow_table(struct mlx5_core_dev *dev, + u16 vport, + enum fs_flow_table_op_mod op_mod, + enum fs_flow_table_type type, + unsigned int level, + unsigned int log_size, + struct mlx5_flow_table *next_ft, + unsigned int *table_id, u32 flags) +{ + return 0; +} + +static int mlx5_cmd_stub_destroy_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft) +{ + return 0; +} + +static int mlx5_cmd_stub_modify_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + return 0; +} + +static int mlx5_cmd_stub_create_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, + unsigned int *group_id) +{ + return 0; +} + +static int mlx5_cmd_stub_destroy_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id) +{ + return 0; +} + +static int mlx5_cmd_stub_create_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + struct fs_fte *fte) +{ + return 0; +} + +static int mlx5_cmd_stub_update_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id, + int modify_mask, + struct fs_fte *fte) +{ + return -EOPNOTSUPP; +} + +static int mlx5_cmd_stub_delete_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_fte *fte) +{ + return 0; +} + +static int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, u32 underlay_qpn, + bool disconnect) { u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0}; u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0}; @@ -71,12 +143,14 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, - u16 vport, - enum fs_flow_table_op_mod op_mod, - enum fs_flow_table_type type, unsigned int level, - unsigned int log_size, struct mlx5_flow_table - *next_ft, unsigned int *table_id, u32 flags) +static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, + u16 vport, + enum fs_flow_table_op_mod op_mod, + enum fs_flow_table_type type, + unsigned int level, + unsigned int log_size, + struct mlx5_flow_table *next_ft, + unsigned int *table_id, u32 flags) { int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN); u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0}; @@ -125,8 +199,8 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, return err; } -int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft) +static int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft) { u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {0}; u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0}; @@ -143,9 +217,9 @@ int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, - struct mlx5_flow_table *next_ft) +static int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) { u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)] = {0}; u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0}; @@ -188,10 +262,10 @@ int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, - u32 *in, - unsigned int *group_id) +static int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, + unsigned int *group_id) { u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0}; int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); @@ -213,9 +287,9 @@ int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, return err; } -int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, - unsigned int group_id) +static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id) { u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {0}; @@ -243,7 +317,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct); u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0}; struct mlx5_flow_rule *dst; - void *in_flow_context; + void *in_flow_context, *vlan; void *in_match_value; void *in_dests; u32 *in; @@ -266,16 +340,25 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); MLX5_SET(flow_context, in_flow_context, group_id, group_id); - MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag); - MLX5_SET(flow_context, in_flow_context, action, fte->action); - MLX5_SET(flow_context, in_flow_context, encap_id, fte->encap_id); - MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->modify_id); + + MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag); + MLX5_SET(flow_context, in_flow_context, action, fte->action.action); + MLX5_SET(flow_context, in_flow_context, encap_id, fte->action.encap_id); + MLX5_SET(flow_context, in_flow_context, modify_header_id, + fte->action.modify_id); + + vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan); + + MLX5_SET(vlan, vlan, ethtype, fte->action.vlan.ethtype); + MLX5_SET(vlan, vlan, vid, fte->action.vlan.vid); + MLX5_SET(vlan, vlan, prio, fte->action.vlan.prio); + in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, match_value); memcpy(in_match_value, &fte->val, sizeof(fte->val)); in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); - if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { int list_size = 0; list_for_each_entry(dst, &fte->node.children, node.list) { @@ -301,7 +384,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, list_size); } - if (fte->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev, log_max_flow_counter, ft->type)); @@ -332,19 +415,21 @@ err_out: return err; } -int mlx5_cmd_create_fte(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, - unsigned group_id, - struct fs_fte *fte) +static int mlx5_cmd_create_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + struct fs_fte *fte) { + unsigned int group_id = group->id; + return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte); } -int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, - unsigned group_id, - int modify_mask, - struct fs_fte *fte) +static int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id, + int modify_mask, + struct fs_fte *fte) { int opmod; int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev, @@ -357,9 +442,9 @@ int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, return mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte); } -int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, - unsigned int index) +static int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_fte *fte) { u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0}; u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {0}; @@ -367,7 +452,7 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); MLX5_SET(delete_fte_in, in, table_type, ft->type); MLX5_SET(delete_fte_in, in, table_id, ft->id); - MLX5_SET(delete_fte_in, in, flow_index, index); + MLX5_SET(delete_fte_in, in, flow_index, fte->index); if (ft->vport) { MLX5_SET(delete_fte_in, in, vport_number, ft->vport); MLX5_SET(delete_fte_in, in, other_vport, 1); @@ -610,3 +695,53 @@ void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id) mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } + +static const struct mlx5_flow_cmds mlx5_flow_cmds = { + .create_flow_table = mlx5_cmd_create_flow_table, + .destroy_flow_table = mlx5_cmd_destroy_flow_table, + .modify_flow_table = mlx5_cmd_modify_flow_table, + .create_flow_group = mlx5_cmd_create_flow_group, + .destroy_flow_group = mlx5_cmd_destroy_flow_group, + .create_fte = mlx5_cmd_create_fte, + .update_fte = mlx5_cmd_update_fte, + .delete_fte = mlx5_cmd_delete_fte, + .update_root_ft = mlx5_cmd_update_root_ft, +}; + +static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = { + .create_flow_table = mlx5_cmd_stub_create_flow_table, + .destroy_flow_table = mlx5_cmd_stub_destroy_flow_table, + .modify_flow_table = mlx5_cmd_stub_modify_flow_table, + .create_flow_group = mlx5_cmd_stub_create_flow_group, + .destroy_flow_group = mlx5_cmd_stub_destroy_flow_group, + .create_fte = mlx5_cmd_stub_create_fte, + .update_fte = mlx5_cmd_stub_update_fte, + .delete_fte = mlx5_cmd_stub_delete_fte, + .update_root_ft = mlx5_cmd_stub_update_root_ft, +}; + +static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void) +{ + return &mlx5_flow_cmds; +} + +static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_stub_cmds(void) +{ + return &mlx5_flow_cmd_stubs; +} + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type) +{ + switch (type) { + case FS_FT_NIC_RX: + case FS_FT_ESW_EGRESS_ACL: + case FS_FT_ESW_INGRESS_ACL: + case FS_FT_FDB: + case FS_FT_SNIFFER_RX: + case FS_FT_SNIFFER_TX: + return mlx5_fs_cmd_get_fw_cmds(); + case FS_FT_NIC_TX: + default: + return mlx5_fs_cmd_get_stub_cmds(); + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 71e2d0f37ad9..6228ba7bfa1a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -33,46 +33,52 @@ #ifndef _MLX5_FS_CMD_ #define _MLX5_FS_CMD_ -int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, - u16 vport, - enum fs_flow_table_op_mod op_mod, - enum fs_flow_table_type type, unsigned int level, - unsigned int log_size, struct mlx5_flow_table - *next_ft, unsigned int *table_id, u32 flags); +#include "fs_core.h" -int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft); +struct mlx5_flow_cmds { + int (*create_flow_table)(struct mlx5_core_dev *dev, + u16 vport, + enum fs_flow_table_op_mod op_mod, + enum fs_flow_table_type type, + unsigned int level, unsigned int log_size, + struct mlx5_flow_table *next_ft, + unsigned int *table_id, u32 flags); + int (*destroy_flow_table)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft); -int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, - struct mlx5_flow_table *next_ft); + int (*modify_flow_table)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft); -int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, - u32 *in, unsigned int *group_id); + int (*create_flow_group)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, + unsigned int *group_id); -int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, - unsigned int group_id); + int (*destroy_flow_group)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id); -int mlx5_cmd_create_fte(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, - unsigned group_id, - struct fs_fte *fte); + int (*create_fte)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg, + struct fs_fte *fte); -int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, - unsigned group_id, - int modify_mask, - struct fs_fte *fte); + int (*update_fte)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id, + int modify_mask, + struct fs_fte *fte); -int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, - unsigned int index); + int (*delete_fte)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_fte *fte); -int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, u32 underlay_qpn, - bool disconnect); + int (*update_root_ft)(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 underlay_qpn, + bool disconnect); +}; int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id); @@ -90,4 +96,6 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b, u32 id, u64 *packets, u64 *bytes); +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 31fc2cfac3b3..de51e7c39bc8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -37,6 +37,8 @@ #include "fs_core.h" #include "fs_cmd.h" #include "diag/fs_tracepoint.h" +#include "accel/ipsec.h" +#include "fpga/ipsec.h" #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ sizeof(struct init_tree_node)) @@ -425,15 +427,17 @@ static void del_sw_prio(struct fs_node *node) static void del_hw_flow_table(struct fs_node *node) { + struct mlx5_flow_root_namespace *root; struct mlx5_flow_table *ft; struct mlx5_core_dev *dev; int err; fs_get_obj(ft, node); dev = get_dev(&ft->node); + root = find_root(&ft->node); if (node->active) { - err = mlx5_cmd_destroy_flow_table(dev, ft); + err = root->cmds->destroy_flow_table(dev, ft); if (err) mlx5_core_warn(dev, "flow steering can't destroy ft\n"); } @@ -454,6 +458,7 @@ static void del_sw_flow_table(struct fs_node *node) static void del_sw_hw_rule(struct fs_node *node) { + struct mlx5_flow_root_namespace *root; struct mlx5_flow_rule *rule; struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; @@ -477,19 +482,20 @@ static void del_sw_hw_rule(struct fs_node *node) if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER && --fte->dests_size) { modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION); - fte->action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT; + fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT; update_fte = true; goto out; } - if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && + if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && --fte->dests_size) { modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST), update_fte = true; } out: + root = find_root(&ft->node); if (update_fte && fte->dests_size) { - err = mlx5_cmd_update_fte(dev, ft, fg->id, modify_mask, fte); + err = root->cmds->update_fte(dev, ft, fg->id, modify_mask, fte); if (err) mlx5_core_warn(dev, "%s can't del rule fg id=%d fte_index=%d\n", @@ -500,6 +506,7 @@ out: static void del_hw_fte(struct fs_node *node) { + struct mlx5_flow_root_namespace *root; struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; struct mlx5_core_dev *dev; @@ -512,9 +519,9 @@ static void del_hw_fte(struct fs_node *node) trace_mlx5_fs_del_fte(fte); dev = get_dev(&ft->node); + root = find_root(&ft->node); if (node->active) { - err = mlx5_cmd_delete_fte(dev, ft, - fte->index); + err = root->cmds->delete_fte(dev, ft, fte); if (err) mlx5_core_warn(dev, "flow steering can't delete fte in index %d of flow group id %d\n", @@ -542,6 +549,7 @@ static void del_sw_fte(struct fs_node *node) static void del_hw_flow_group(struct fs_node *node) { + struct mlx5_flow_root_namespace *root; struct mlx5_flow_group *fg; struct mlx5_flow_table *ft; struct mlx5_core_dev *dev; @@ -551,7 +559,8 @@ static void del_hw_flow_group(struct fs_node *node) dev = get_dev(&ft->node); trace_mlx5_fs_del_fg(fg); - if (fg->node.active && mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) + root = find_root(&ft->node); + if (fg->node.active && root->cmds->destroy_flow_group(dev, ft, fg->id)) mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", fg->id, ft->id); } @@ -615,10 +624,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, memcpy(fte->val, match_value, sizeof(fte->val)); fte->node.type = FS_TYPE_FLOW_ENTRY; - fte->flow_tag = flow_act->flow_tag; - fte->action = flow_act->action; - fte->encap_id = flow_act->encap_id; - fte->modify_id = flow_act->modify_id; + fte->action = *flow_act; tree_init_node(&fte->node, del_hw_fte, del_sw_fte); @@ -797,15 +803,14 @@ static int connect_fts_in_prio(struct mlx5_core_dev *dev, struct fs_prio *prio, struct mlx5_flow_table *ft) { + struct mlx5_flow_root_namespace *root = find_root(&prio->node); struct mlx5_flow_table *iter; int i = 0; int err; fs_for_each_ft(iter, prio) { i++; - err = mlx5_cmd_modify_flow_table(dev, - iter, - ft); + err = root->cmds->modify_flow_table(dev, iter, ft); if (err) { mlx5_core_warn(dev, "Failed to modify flow table %d\n", iter->id); @@ -853,12 +858,12 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio if (list_empty(&root->underlay_qpns)) { /* Don't set any QPN (zero) in case QPN list is empty */ qpn = 0; - err = mlx5_cmd_update_root_ft(root->dev, ft, qpn, false); + err = root->cmds->update_root_ft(root->dev, ft, qpn, false); } else { list_for_each_entry(uqp, &root->underlay_qpns, list) { qpn = uqp->qpn; - err = mlx5_cmd_update_root_ft(root->dev, ft, qpn, - false); + err = root->cmds->update_root_ft(root->dev, ft, + qpn, false); if (err) break; } @@ -877,6 +882,7 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, struct mlx5_flow_destination *dest) { + struct mlx5_flow_root_namespace *root; struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; struct fs_fte *fte; @@ -884,17 +890,16 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, int err = 0; fs_get_obj(fte, rule->node.parent); - if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) return -EINVAL; down_write_ref_node(&fte->node); fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); memcpy(&rule->dest_attr, dest, sizeof(*dest)); - err = mlx5_cmd_update_fte(get_dev(&ft->node), - ft, fg->id, - modify_mask, - fte); + root = find_root(&ft->node); + err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id, + modify_mask, fte); up_write_ref_node(&fte->node); return err; @@ -1035,9 +1040,9 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table); log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); - err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type, - ft->level, log_table_sz, next_ft, &ft->id, - ft->flags); + err = root->cmds->create_flow_table(root->dev, ft->vport, ft->op_mod, + ft->type, ft->level, log_table_sz, + next_ft, &ft->id, ft->flags); if (err) goto free_ft; @@ -1053,7 +1058,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa mutex_unlock(&root->chain_lock); return ft; destroy_ft: - mlx5_cmd_destroy_flow_table(root->dev, ft); + root->cmds->destroy_flow_table(root->dev, ft); free_ft: kfree(ft); unlock_root: @@ -1125,6 +1130,7 @@ EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table); struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *fg_in) { + struct mlx5_flow_root_namespace *root = find_root(&ft->node); void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, fg_in, match_criteria); u8 match_criteria_enable = MLX5_GET(create_flow_group_in, @@ -1152,7 +1158,7 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, if (IS_ERR(fg)) return fg; - err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); + err = root->cmds->create_flow_group(dev, ft, fg_in, &fg->id); if (err) { tree_put_node(&fg->node); return ERR_PTR(err); @@ -1275,6 +1281,7 @@ add_rule_fte(struct fs_fte *fte, int dest_num, bool update_action) { + struct mlx5_flow_root_namespace *root; struct mlx5_flow_handle *handle; struct mlx5_flow_table *ft; int modify_mask = 0; @@ -1290,12 +1297,13 @@ add_rule_fte(struct fs_fte *fte, modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION); fs_get_obj(ft, fg->node.parent); + root = find_root(&fg->node); if (!(fte->status & FS_FTE_STATUS_EXISTING)) - err = mlx5_cmd_create_fte(get_dev(&ft->node), - ft, fg->id, fte); + err = root->cmds->create_fte(get_dev(&ft->node), + ft, fg, fte); else - err = mlx5_cmd_update_fte(get_dev(&ft->node), - ft, fg->id, modify_mask, fte); + err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id, + modify_mask, fte); if (err) goto free_handle; @@ -1360,6 +1368,7 @@ out: static int create_auto_flow_group(struct mlx5_flow_table *ft, struct mlx5_flow_group *fg) { + struct mlx5_flow_root_namespace *root = find_root(&ft->node); struct mlx5_core_dev *dev = get_dev(&ft->node); int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); void *match_criteria_addr; @@ -1380,7 +1389,7 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft, memcpy(match_criteria_addr, fg->mask.match_criteria, sizeof(fg->mask.match_criteria)); - err = mlx5_cmd_create_flow_group(dev, ft, in, &fg->id); + err = root->cmds->create_flow_group(dev, ft, in, &fg->id); if (!err) { fg->node.active = true; trace_mlx5_fs_add_fg(fg); @@ -1430,7 +1439,9 @@ static bool check_conflicting_actions(u32 action1, u32 action2) if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_ENCAP | MLX5_FLOW_CONTEXT_ACTION_DECAP | - MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | + MLX5_FLOW_CONTEXT_ACTION_VLAN_POP | + MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)) return true; return false; @@ -1438,16 +1449,17 @@ static bool check_conflicting_actions(u32 action1, u32 action2) static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act) { - if (check_conflicting_actions(flow_act->action, fte->action)) { + if (check_conflicting_actions(flow_act->action, fte->action.action)) { mlx5_core_warn(get_dev(&fte->node), "Found two FTEs with conflicting actions\n"); return -EEXIST; } - if (fte->flow_tag != flow_act->flow_tag) { + if (flow_act->has_flow_tag && + fte->action.flow_tag != flow_act->flow_tag) { mlx5_core_warn(get_dev(&fte->node), "FTE flow tag %u already exists with different flow tag %u\n", - fte->flow_tag, + fte->action.flow_tag, flow_act->flow_tag); return -EEXIST; } @@ -1471,12 +1483,12 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, if (ret) return ERR_PTR(ret); - old_action = fte->action; - fte->action |= flow_act->action; + old_action = fte->action.action; + fte->action.action |= flow_act->action; handle = add_rule_fte(fte, fg, dest, dest_num, old_action != flow_act->action); if (IS_ERR(handle)) { - fte->action = old_action; + fte->action.action = old_action; return handle; } trace_mlx5_fs_set_fte(fte, false); @@ -1637,7 +1649,6 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, list_for_each_entry(iter, match_head, list) { nested_down_read_ref_node(&iter->g->node, FS_LOCK_PARENT); - ida_pre_get(&iter->g->fte_allocator, GFP_KERNEL); } search_again_locked: @@ -1919,7 +1930,6 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft) return 0; new_root_ft = find_next_ft(ft); - if (!new_root_ft) { root->root_ft = NULL; return 0; @@ -1928,13 +1938,14 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft) if (list_empty(&root->underlay_qpns)) { /* Don't set any QPN (zero) in case QPN list is empty */ qpn = 0; - err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, qpn, - false); + err = root->cmds->update_root_ft(root->dev, new_root_ft, + qpn, false); } else { list_for_each_entry(uqp, &root->underlay_qpns, list) { qpn = uqp->qpn; - err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, - qpn, false); + err = root->cmds->update_root_ft(root->dev, + new_root_ft, qpn, + false); if (err) break; } @@ -2046,6 +2057,11 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, return &steering->sniffer_tx_root_ns->ns; else return NULL; + case MLX5_FLOW_NAMESPACE_EGRESS: + if (steering->egress_root_ns) + return &steering->egress_root_ns->ns; + else + return NULL; default: return NULL; } @@ -2236,13 +2252,18 @@ static int init_root_tree(struct mlx5_flow_steering *steering, return 0; } -static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering *steering, - enum fs_flow_table_type - table_type) +static struct mlx5_flow_root_namespace +*create_root_ns(struct mlx5_flow_steering *steering, + enum fs_flow_table_type table_type) { + const struct mlx5_flow_cmds *cmds = mlx5_fs_cmd_get_default(table_type); struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_namespace *ns; + if (mlx5_accel_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE && + (table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX)) + cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type); + /* Create the root namespace */ root_ns = kvzalloc(sizeof(*root_ns), GFP_KERNEL); if (!root_ns) @@ -2250,6 +2271,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering root_ns->dev = steering->dev; root_ns->table_type = table_type; + root_ns->cmds = cmds; INIT_LIST_HEAD(&root_ns->underlay_qpns); @@ -2408,6 +2430,7 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev) cleanup_root_ns(steering->fdb_root_ns); cleanup_root_ns(steering->sniffer_rx_root_ns); cleanup_root_ns(steering->sniffer_tx_root_ns); + cleanup_root_ns(steering->egress_root_ns); mlx5_cleanup_fc_stats(dev); kmem_cache_destroy(steering->ftes_cache); kmem_cache_destroy(steering->fgs_cache); @@ -2553,6 +2576,20 @@ cleanup_root_ns: return err; } +static int init_egress_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; + + steering->egress_root_ns = create_root_ns(steering, + FS_FT_NIC_TX); + if (!steering->egress_root_ns) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&steering->egress_root_ns->ns, 0, 1); + return PTR_ERR_OR_ZERO(prio); +} + int mlx5_init_fs(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering; @@ -2618,6 +2655,12 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) goto err; } + if (MLX5_IPSEC_DEV(dev)) { + err = init_egress_root_ns(steering); + if (err) + goto err; + } + return 0; err: mlx5_cleanup_fs(dev); @@ -2641,7 +2684,8 @@ int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) goto update_ft_fail; } - err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, false); + err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn, + false); if (err) { mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n", underlay_qpn, err); @@ -2684,7 +2728,8 @@ int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) goto out; } - err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, true); + err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn, + true); if (err) mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n", underlay_qpn, err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 05262708f14b..e26d3e9d5f9f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -48,6 +48,7 @@ enum fs_node_type { enum fs_flow_table_type { FS_FT_NIC_RX = 0x0, + FS_FT_NIC_TX = 0x1, FS_FT_ESW_EGRESS_ACL = 0x2, FS_FT_ESW_INGRESS_ACL = 0x3, FS_FT_FDB = 0X4, @@ -75,6 +76,7 @@ struct mlx5_flow_steering { struct mlx5_flow_root_namespace **esw_ingress_root_ns; struct mlx5_flow_root_namespace *sniffer_tx_root_ns; struct mlx5_flow_root_namespace *sniffer_rx_root_ns; + struct mlx5_flow_root_namespace *egress_root_ns; }; struct fs_node { @@ -174,11 +176,8 @@ struct fs_fte { struct fs_node node; u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; u32 dests_size; - u32 flow_tag; u32 index; - u32 action; - u32 encap_id; - u32 modify_id; + struct mlx5_flow_act action; enum fs_fte_status status; struct mlx5_fc *counter; struct rhash_head hash; @@ -224,6 +223,7 @@ struct mlx5_flow_root_namespace { /* Should be held when chaining flow tables */ struct mutex chain_lock; struct list_head underlay_qpns; + const struct mlx5_flow_cmds *cmds; }; int mlx5_init_fc_stats(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 9d11e92fb541..70066975f1b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -183,6 +183,9 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, debug)) + mlx5_core_get_caps(dev, MLX5_CAP_DEBUG); + if (MLX5_CAP_GEN(dev, pcam_reg)) mlx5_get_pcam_reg(dev); @@ -242,7 +245,7 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev) force_state = MLX5_GET(teardown_hca_out, out, force_state); if (force_state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) { - mlx5_core_err(dev, "teardown with force mode failed\n"); + mlx5_core_warn(dev, "teardown with force mode failed, doing normal teardown\n"); return -EIO; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index f953378bd13d..af3bb2f7a504 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -56,14 +56,17 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */ - mlx5e_init_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST); + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, false); + mlx5e_set_rq_type(mdev, params); + mlx5e_init_rq_type_params(mdev, params); /* RQ size in ipoib by default is 512 */ - params->log_rq_size = is_kdump_kernel() ? + params->log_rq_mtu_frames = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE; params->lro_en = false; + params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN; } /* Called directly after IPoIB netdevice was created to initialize SW structs */ @@ -79,10 +82,10 @@ void mlx5i_init(struct mlx5_core_dev *mdev, priv->netdev = netdev; priv->profile = profile; priv->ppriv = ppriv; - priv->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN; mutex_init(&priv->state_lock); - mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev)); + mlx5e_build_nic_params(mdev, &priv->channels.params, + profile->max_nch(mdev), netdev->mtu); mlx5i_build_nic_params(mdev, &priv->channels.params); mlx5e_timestamp_init(priv); @@ -366,25 +369,27 @@ static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); struct mlx5e_channels new_channels = {}; - int curr_mtu; + struct mlx5e_params *params; int err = 0; mutex_lock(&priv->state_lock); - curr_mtu = netdev->mtu; - netdev->mtu = new_mtu; + params = &priv->channels.params; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + params->sw_mtu = new_mtu; + netdev->mtu = params->sw_mtu; goto out; + } - new_channels.params = priv->channels.params; + new_channels.params = *params; + new_channels.params.sw_mtu = new_mtu; err = mlx5e_open_channels(priv, &new_channels); - if (err) { - netdev->mtu = curr_mtu; + if (err) goto out; - } mlx5e_switch_priv_channels(priv, &new_channels, NULL); + netdev->mtu = new_channels.params.sw_mtu; out: mutex_unlock(&priv->state_lock); @@ -538,7 +543,7 @@ static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca, err = mlx5_core_detach_mcg(mdev, gid, ipriv->qp.qpn); if (err) - mlx5_core_dbg(mdev, "failed dettaching QPN 0x%x, MGID %pI6\n", + mlx5_core_dbg(mdev, "failed detaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index b69e9d847a6b..54a188f41f90 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -290,7 +290,7 @@ static void mlx5i_pkey_init(struct mlx5_core_dev *mdev, netdev->ethtool_ops = &mlx5i_pkey_ethtool_ops; /* Use dummy rqs */ - priv->channels.params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; + priv->channels.params.log_rq_mtu_frames = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; } /* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index ae391e4b7070..13b6f66310c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -58,6 +58,7 @@ #include "eswitch.h" #include "lib/mlx5.h" #include "fpga/core.h" +#include "fpga/ipsec.h" #include "accel/ipsec.h" #include "lib/clock.h" @@ -942,9 +943,9 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out; } - err = mlx5_init_cq_table(dev); + err = mlx5_cq_debugfs_init(dev); if (err) { - dev_err(&pdev->dev, "failed to initialize cq table\n"); + dev_err(&pdev->dev, "failed to initialize cq debugfs\n"); goto err_eq_cleanup; } @@ -1002,7 +1003,7 @@ err_tables_cleanup: mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); - mlx5_cleanup_cq_table(dev); + mlx5_cq_debugfs_cleanup(dev); err_eq_cleanup: mlx5_eq_cleanup(dev); @@ -1023,7 +1024,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); - mlx5_cleanup_cq_table(dev); + mlx5_cq_debugfs_cleanup(dev); mlx5_eq_cleanup(dev); } @@ -1173,6 +1174,18 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_affinity_hints; } + err = mlx5_fpga_device_start(dev); + if (err) { + dev_err(&pdev->dev, "fpga device start failed %d\n", err); + goto err_fpga_start; + } + + err = mlx5_accel_ipsec_init(dev); + if (err) { + dev_err(&pdev->dev, "IPSec device start failed %d\n", err); + goto err_ipsec_start; + } + err = mlx5_init_fs(dev); if (err) { dev_err(&pdev->dev, "Failed to init flow steering\n"); @@ -1191,17 +1204,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_sriov; } - err = mlx5_fpga_device_start(dev); - if (err) { - dev_err(&pdev->dev, "fpga device start failed %d\n", err); - goto err_fpga_start; - } - err = mlx5_accel_ipsec_init(dev); - if (err) { - dev_err(&pdev->dev, "IPSec device start failed %d\n", err); - goto err_ipsec_start; - } - if (mlx5_device_registered(dev)) { mlx5_attach_device(dev); } else { @@ -1219,17 +1221,18 @@ out: return 0; err_reg_dev: - mlx5_accel_ipsec_cleanup(dev); -err_ipsec_start: - mlx5_fpga_device_stop(dev); - -err_fpga_start: mlx5_sriov_detach(dev); err_sriov: mlx5_cleanup_fs(dev); err_fs: + mlx5_accel_ipsec_cleanup(dev); + +err_ipsec_start: + mlx5_fpga_device_stop(dev); + +err_fpga_start: mlx5_irq_clear_affinity_hints(dev); err_affinity_hints: @@ -1296,11 +1299,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, if (mlx5_device_registered(dev)) mlx5_detach_device(dev); - mlx5_accel_ipsec_cleanup(dev); - mlx5_fpga_device_stop(dev); - mlx5_sriov_detach(dev); mlx5_cleanup_fs(dev); + mlx5_accel_ipsec_cleanup(dev); + mlx5_fpga_device_stop(dev); mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); @@ -1657,6 +1659,7 @@ static int __init init(void) get_random_bytes(&sw_owner_id, sizeof(sw_owner_id)); mlx5_core_verify_params(); + mlx5_fpga_ipsec_build_fs_cmds(); mlx5_register_debugfs(); err = pci_register_driver(&mlx5_core_driver); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 394552f36fcf..7d001fe6e631 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -38,16 +38,11 @@ #include <linux/sched.h> #include <linux/if_link.h> #include <linux/firmware.h> +#include <linux/mlx5/cq.h> #define DRIVER_NAME "mlx5_core" #define DRIVER_VERSION "5.0-0" -#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev)) -#define MLX5_VPORT_MANAGER(mdev) \ - (MLX5_CAP_GEN(mdev, vport_group_manager) && \ - (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ - mlx5_core_is_pf(mdev)) - extern uint mlx5_core_debug_mask; #define mlx5_core_dbg(__dev, format, ...) \ @@ -55,6 +50,11 @@ extern uint mlx5_core_debug_mask; __func__, __LINE__, current->pid, \ ##__VA_ARGS__) +#define mlx5_core_dbg_once(__dev, format, ...) \ + dev_dbg_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + #define mlx5_core_dbg_mask(__dev, mask, format, ...) \ do { \ if ((mask) & mlx5_core_debug_mask) \ @@ -115,9 +115,29 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 element_id); int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev); + +int mlx5_eq_init(struct mlx5_core_dev *dev); +void mlx5_eq_cleanup(struct mlx5_core_dev *dev); +int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, + int nent, u64 mask, const char *name, + enum mlx5_eq_type type); +int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + u32 *out, int outlen); +int mlx5_start_eqs(struct mlx5_core_dev *dev); +void mlx5_stop_eqs(struct mlx5_core_dev *dev); struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq); void mlx5_cq_tasklet_cb(unsigned long data); +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); +int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, u8 access_reg_group); @@ -186,4 +206,5 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) int mlx5_lag_allow(struct mlx5_core_dev *dev); int mlx5_lag_forbid(struct mlx5_core_dev *dev); +void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol); #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index c37d00cd472a..fa9d0760dd36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -483,6 +483,17 @@ int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt); +static int mlx5_query_pfcc_reg(struct mlx5_core_dev *dev, u32 *out, + u32 out_size) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; + + MLX5_SET(pfcc_reg, in, local_port, 1); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + out_size, MLX5_REG_PFCC, 0, 0); +} + int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause) { u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; @@ -500,13 +511,10 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_pause); int mlx5_query_port_pause(struct mlx5_core_dev *dev, u32 *rx_pause, u32 *tx_pause) { - u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; int err; - MLX5_SET(pfcc_reg, in, local_port, 1); - err = mlx5_core_access_reg(dev, in, sizeof(in), out, - sizeof(out), MLX5_REG_PFCC, 0, 0); + err = mlx5_query_pfcc_reg(dev, out, sizeof(out)); if (err) return err; @@ -520,6 +528,49 @@ int mlx5_query_port_pause(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_pause); +int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev, + u16 stall_critical_watermark, + u16 stall_minor_watermark) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + + MLX5_SET(pfcc_reg, in, local_port, 1); + MLX5_SET(pfcc_reg, in, pptx_mask_n, 1); + MLX5_SET(pfcc_reg, in, pprx_mask_n, 1); + MLX5_SET(pfcc_reg, in, ppan_mask_n, 1); + MLX5_SET(pfcc_reg, in, critical_stall_mask, 1); + MLX5_SET(pfcc_reg, in, minor_stall_mask, 1); + MLX5_SET(pfcc_reg, in, device_stall_critical_watermark, + stall_critical_watermark); + MLX5_SET(pfcc_reg, in, device_stall_minor_watermark, stall_minor_watermark); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PFCC, 0, 1); +} + +int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev, + u16 *stall_critical_watermark, + u16 *stall_minor_watermark) +{ + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + int err; + + err = mlx5_query_pfcc_reg(dev, out, sizeof(out)); + if (err) + return err; + + if (stall_critical_watermark) + *stall_critical_watermark = MLX5_GET(pfcc_reg, out, + device_stall_critical_watermark); + + if (stall_minor_watermark) + *stall_minor_watermark = MLX5_GET(pfcc_reg, out, + device_stall_minor_watermark); + + return 0; +} + int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx) { u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; @@ -538,13 +589,10 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_pfc); int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) { - u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; int err; - MLX5_SET(pfcc_reg, in, local_port, 1); - err = mlx5_core_access_reg(dev, in, sizeof(in), out, - sizeof(out), MLX5_REG_PFCC, 0, 0); + err = mlx5_query_pfcc_reg(dev, out, sizeof(out)); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index 9e38343a951f..dae1c5c5d27c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -157,6 +157,31 @@ int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out) } EXPORT_SYMBOL(mlx5_core_query_sq); +int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state) +{ + void *out; + void *sqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(query_sq_out); + out = kvzalloc(inlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_sq(dev, sqn, out); + if (err) + goto out; + + sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context); + *state = MLX5_GET(sqc, sqc, state); + +out: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_query_sq_state); + int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tirn) { @@ -329,27 +354,6 @@ int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn) return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u32 *out) -{ - u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)] = {0}; - void *srqc; - void *xrc_srqc; - int err; - - MLX5_SET(query_xrc_srq_in, in, opcode, MLX5_CMD_OP_QUERY_XRC_SRQ); - MLX5_SET(query_xrc_srq_in, in, xrc_srqn, xsrqn); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, - MLX5_ST_SZ_BYTES(query_xrc_srq_out)); - if (!err) { - xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, out, - xrc_srq_context_entry); - srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); - memcpy(srqc, xrc_srqc, MLX5_ST_SZ_BYTES(srqc)); - } - - return err; -} - int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm) { u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index dfe36cf6fbea..177e076b8d17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1070,6 +1070,32 @@ free: } EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter); +int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, + u64 *rx_discard_vport_down, + u64 *tx_discard_vport_down) +{ + u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {0}; + int err; + + MLX5_SET(query_vnic_env_in, in, opcode, + MLX5_CMD_OP_QUERY_VNIC_ENV); + MLX5_SET(query_vnic_env_in, in, op_mod, 0); + MLX5_SET(query_vnic_env_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vnic_env_in, in, other_vport, 1); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *rx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out, + vport_env.receive_discard_vport_down); + *tx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out, + vport_env.transmit_discard_vport_down); + return 0; +} + int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, int vf, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 6bcfc25350f5..ea66448ba365 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -41,7 +41,7 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq) u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq) { - return wq->sz_m1 + 1; + return wq->fbc.sz_m1 + 1; } u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq) @@ -62,7 +62,7 @@ static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq) static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq) { - return mlx5_cqwq_get_size(wq) << wq->log_stride; + return mlx5_cqwq_get_size(wq) << wq->fbc.log_stride; } static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq) @@ -92,7 +92,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, goto err_db_free; } - wq->buf = wq_ctrl->buf.direct.buf; + wq->buf = wq_ctrl->buf.frags->buf; wq->db = wq_ctrl->db.db; wq_ctrl->mdev = mdev; @@ -130,7 +130,7 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, goto err_db_free; } - wq->rq.buf = wq_ctrl->buf.direct.buf; + wq->rq.buf = wq_ctrl->buf.frags->buf; wq->sq.buf = wq->rq.buf + mlx5_wq_cyc_get_byte_size(&wq->rq); wq->rq.db = &wq_ctrl->db.db[MLX5_RCV_DBR]; wq->sq.db = &wq_ctrl->db.db[MLX5_SND_DBR]; @@ -151,11 +151,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, { int err; - wq->log_stride = 6 + MLX5_GET(cqc, cqc, cqe_sz); - wq->log_sz = MLX5_GET(cqc, cqc, log_cq_size); - wq->sz_m1 = (1 << wq->log_sz) - 1; - wq->log_frag_strides = PAGE_SHIFT - wq->log_stride; - wq->frag_sz_m1 = (1 << wq->log_frag_strides) - 1; + mlx5_core_init_cq_frag_buf(&wq->fbc, cqc); err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { @@ -172,7 +168,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, goto err_db_free; } - wq->frag_buf = wq_ctrl->frag_buf; + wq->fbc.frag_buf = wq_ctrl->frag_buf; wq->db = wq_ctrl->db.db; wq_ctrl->mdev = mdev; @@ -209,7 +205,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, goto err_db_free; } - wq->buf = wq_ctrl->buf.direct.buf; + wq->buf = wq_ctrl->buf.frags->buf; wq->db = wq_ctrl->db.db; for (i = 0; i < wq->sz_m1; i++) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index 718589d0cec2..fca90b94596d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -45,7 +45,7 @@ struct mlx5_wq_param { struct mlx5_wq_ctrl { struct mlx5_core_dev *mdev; - struct mlx5_buf buf; + struct mlx5_frag_buf buf; struct mlx5_db db; }; @@ -68,14 +68,9 @@ struct mlx5_wq_qp { }; struct mlx5_cqwq { - struct mlx5_frag_buf frag_buf; - __be32 *db; - u32 sz_m1; - u32 frag_sz_m1; - u32 cc; /* consumer counter */ - u8 log_sz; - u8 log_stride; - u8 log_frag_strides; + struct mlx5_frag_buf_ctrl fbc; + __be32 *db; + u32 cc; /* consumer counter */ }; struct mlx5_wq_ll { @@ -131,20 +126,17 @@ static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2) static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq) { - return wq->cc & wq->sz_m1; + return wq->cc & wq->fbc.sz_m1; } static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix) { - unsigned int frag = (ix >> wq->log_frag_strides); - - return wq->frag_buf.frags[frag].buf + - ((wq->frag_sz_m1 & ix) << wq->log_stride); + return mlx5_frag_buf_get_wqe(&wq->fbc, ix); } static inline u32 mlx5_cqwq_get_wrap_cnt(struct mlx5_cqwq *wq) { - return wq->cc >> wq->log_sz; + return wq->cc >> wq->fbc.log_sz; } static inline void mlx5_cqwq_pop(struct mlx5_cqwq *wq) |