diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
145 files changed, 9344 insertions, 4103 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index 1b4b1f642317..825e05fb8607 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -27,6 +27,7 @@ config MLX4_EN_DCB config MLX4_CORE tristate depends on PCI + select AUXILIARY_BUS select NET_DEVLINK default n diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 0eb7b83637d8..0d8a362c2673 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -194,7 +194,7 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) mutex_unlock(&persist->device_state_mutex); /* At that step HW was already reset, now notify clients */ - mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, NULL); mlx4_cmd_wake_completions(dev); return; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index c56d2194cbfc..f5b1f8c7834f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2113,7 +2113,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) goto inform_slave_state; - mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, slave); + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, &slave); /* write the version in the event field */ reply |= mlx4_comm_get_version(); @@ -2152,7 +2152,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, if (mlx4_master_activate_admin_state(priv, slave)) goto reset_slave; slave_state[slave].active = true; - mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, slave); + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, &slave); break; case MLX4_COMM_CMD_VHCR_POST: if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) && diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 7d45f1d55f79..164a13272faa 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1467,8 +1467,8 @@ static int add_ip_rule(struct mlx4_en_priv *priv, struct list_head *list_h) { int err; - struct mlx4_spec_list *spec_l2 = NULL; - struct mlx4_spec_list *spec_l3 = NULL; + struct mlx4_spec_list *spec_l2; + struct mlx4_spec_list *spec_l3; struct ethtool_usrip4_spec *l3_mask = &cmd->fs.m_u.usr_ip4_spec; spec_l3 = kzalloc(sizeof(*spec_l3), GFP_KERNEL); @@ -1505,9 +1505,9 @@ static int add_tcp_udp_rule(struct mlx4_en_priv *priv, struct list_head *list_h, int proto) { int err; - struct mlx4_spec_list *spec_l2 = NULL; - struct mlx4_spec_list *spec_l3 = NULL; - struct mlx4_spec_list *spec_l4 = NULL; + struct mlx4_spec_list *spec_l2; + struct mlx4_spec_list *spec_l3; + struct mlx4_spec_list *spec_l4; struct ethtool_tcpip4_spec *l4_mask = &cmd->fs.m_u.tcp_ip4_spec; spec_l2 = kzalloc(sizeof(*spec_l2), GFP_KERNEL); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index f1259bdb1a29..d8f4d00ad26b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -183,24 +183,31 @@ static void mlx4_en_get_profile(struct mlx4_en_dev *mdev) } } -static void *mlx4_en_get_netdev(struct mlx4_dev *dev, void *ctx, u8 port) +static int mlx4_en_event(struct notifier_block *this, unsigned long event, + void *param) { - struct mlx4_en_dev *endev = ctx; - - return endev->pndev[port]; -} - -static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr, - enum mlx4_dev_event event, unsigned long port) -{ - struct mlx4_en_dev *mdev = (struct mlx4_en_dev *) endev_ptr; + struct mlx4_en_dev *mdev = + container_of(this, struct mlx4_en_dev, mlx_nb); + struct mlx4_dev *dev = mdev->dev; struct mlx4_en_priv *priv; + int port; + + switch (event) { + case MLX4_DEV_EVENT_CATASTROPHIC_ERROR: + case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: + case MLX4_DEV_EVENT_SLAVE_INIT: + case MLX4_DEV_EVENT_SLAVE_SHUTDOWN: + break; + default: + port = *(int *)param; + break; + } switch (event) { case MLX4_DEV_EVENT_PORT_UP: case MLX4_DEV_EVENT_PORT_DOWN: if (!mdev->pndev[port]) - return; + return NOTIFY_DONE; priv = netdev_priv(mdev->pndev[port]); /* To prevent races, we poll the link state in a separate task rather than changing it here */ @@ -212,23 +219,30 @@ static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr, mlx4_err(mdev, "Internal error detected, restarting device\n"); break; + case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: case MLX4_DEV_EVENT_SLAVE_INIT: case MLX4_DEV_EVENT_SLAVE_SHUTDOWN: break; default: if (port < 1 || port > dev->caps.num_ports || !mdev->pndev[port]) - return; - mlx4_warn(mdev, "Unhandled event %d for port %d\n", event, - (int) port); + return NOTIFY_DONE; + mlx4_warn(mdev, "Unhandled event %d for port %d\n", (int)event, + port); } + + return NOTIFY_DONE; } -static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr) +static void mlx4_en_remove(struct auxiliary_device *adev) { - struct mlx4_en_dev *mdev = endev_ptr; + struct mlx4_adev *madev = container_of(adev, struct mlx4_adev, adev); + struct mlx4_dev *dev = madev->mdev; + struct mlx4_en_dev *mdev = auxiliary_get_drvdata(adev); int i; + mlx4_unregister_event_notifier(dev, &mdev->mlx_nb); + mutex_lock(&mdev->state_lock); mdev->device_up = false; mutex_unlock(&mdev->state_lock); @@ -242,52 +256,41 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr) iounmap(mdev->uar_map); mlx4_uar_free(dev, &mdev->priv_uar); mlx4_pd_free(dev, mdev->priv_pdn); - if (mdev->nb.notifier_call) - unregister_netdevice_notifier(&mdev->nb); + if (mdev->netdev_nb.notifier_call) + unregister_netdevice_notifier(&mdev->netdev_nb); kfree(mdev); } -static void mlx4_en_activate(struct mlx4_dev *dev, void *ctx) -{ - int i; - struct mlx4_en_dev *mdev = ctx; - - /* Create a netdev for each port */ - mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { - mlx4_info(mdev, "Activating port:%d\n", i); - if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i])) - mdev->pndev[i] = NULL; - } - - /* register notifier */ - mdev->nb.notifier_call = mlx4_en_netdev_event; - if (register_netdevice_notifier(&mdev->nb)) { - mdev->nb.notifier_call = NULL; - mlx4_err(mdev, "Failed to create notifier\n"); - } -} - -static void *mlx4_en_add(struct mlx4_dev *dev) +static int mlx4_en_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) { + struct mlx4_adev *madev = container_of(adev, struct mlx4_adev, adev); + struct mlx4_dev *dev = madev->mdev; struct mlx4_en_dev *mdev; - int i; + int err, i; printk_once(KERN_INFO "%s", mlx4_en_version); mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); - if (!mdev) + if (!mdev) { + err = -ENOMEM; goto err_free_res; + } - if (mlx4_pd_alloc(dev, &mdev->priv_pdn)) + err = mlx4_pd_alloc(dev, &mdev->priv_pdn); + if (err) goto err_free_dev; - if (mlx4_uar_alloc(dev, &mdev->priv_uar)) + err = mlx4_uar_alloc(dev, &mdev->priv_uar); + if (err) goto err_pd; mdev->uar_map = ioremap((phys_addr_t) mdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE); - if (!mdev->uar_map) + if (!mdev->uar_map) { + err = -ENOMEM; goto err_uar; + } spin_lock_init(&mdev->uar_lock); mdev->dev = dev; @@ -299,13 +302,15 @@ static void *mlx4_en_add(struct mlx4_dev *dev) if (!mdev->LSO_support) mlx4_warn(mdev, "LSO not supported, please upgrade to later FW version to enable LSO\n"); - if (mlx4_mr_alloc(mdev->dev, mdev->priv_pdn, 0, ~0ull, - MLX4_PERM_LOCAL_WRITE | MLX4_PERM_LOCAL_READ, - 0, 0, &mdev->mr)) { + err = mlx4_mr_alloc(mdev->dev, mdev->priv_pdn, 0, ~0ull, + MLX4_PERM_LOCAL_WRITE | MLX4_PERM_LOCAL_READ, 0, 0, + &mdev->mr); + if (err) { mlx4_err(mdev, "Failed allocating memory region\n"); goto err_map; } - if (mlx4_mr_enable(mdev->dev, &mdev->mr)) { + err = mlx4_mr_enable(mdev->dev, &mdev->mr); + if (err) { mlx4_err(mdev, "Failed enabling memory region\n"); goto err_mr; } @@ -325,15 +330,39 @@ static void *mlx4_en_add(struct mlx4_dev *dev) * Note: we cannot use the shared workqueue because of deadlocks caused * by the rtnl lock */ mdev->workqueue = create_singlethread_workqueue("mlx4_en"); - if (!mdev->workqueue) + if (!mdev->workqueue) { + err = -ENOMEM; goto err_mr; + } /* At this stage all non-port specific tasks are complete: * mark the card state as up */ mutex_init(&mdev->state_lock); mdev->device_up = true; - return mdev; + /* register mlx4 core notifier */ + mdev->mlx_nb.notifier_call = mlx4_en_event; + err = mlx4_register_event_notifier(dev, &mdev->mlx_nb); + WARN(err, "failed to register mlx4 event notifier (%d)", err); + + /* Setup ports */ + + /* Create a netdev for each port */ + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { + mlx4_info(mdev, "Activating port:%d\n", i); + if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i])) + mdev->pndev[i] = NULL; + } + + /* register netdev notifier */ + mdev->netdev_nb.notifier_call = mlx4_en_netdev_event; + if (register_netdevice_notifier(&mdev->netdev_nb)) { + mdev->netdev_nb.notifier_call = NULL; + mlx4_err(mdev, "Failed to create netdev notifier\n"); + } + + auxiliary_set_drvdata(adev, mdev); + return 0; err_mr: (void) mlx4_mr_free(dev, &mdev->mr); @@ -347,16 +376,24 @@ err_pd: err_free_dev: kfree(mdev); err_free_res: - return NULL; + return err; } -static struct mlx4_interface mlx4_en_interface = { - .add = mlx4_en_add, - .remove = mlx4_en_remove, - .event = mlx4_en_event, - .get_dev = mlx4_en_get_netdev, +static const struct auxiliary_device_id mlx4_en_id_table[] = { + { .name = MLX4_ADEV_NAME ".eth" }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, mlx4_en_id_table); + +static struct mlx4_adrv mlx4_en_adrv = { + .adrv = { + .name = "eth", + .probe = mlx4_en_probe, + .remove = mlx4_en_remove, + .id_table = mlx4_en_id_table, + }, .protocol = MLX4_PROT_ETH, - .activate = mlx4_en_activate, }; static void mlx4_en_verify_params(void) @@ -385,12 +422,12 @@ static int __init mlx4_en_init(void) mlx4_en_verify_params(); mlx4_en_init_ptys2ethtool_map(); - return mlx4_register_interface(&mlx4_en_interface); + return mlx4_register_auxiliary_driver(&mlx4_en_adrv); } static void __exit mlx4_en_cleanup(void) { - mlx4_unregister_interface(&mlx4_en_interface); + mlx4_unregister_auxiliary_driver(&mlx4_en_adrv); } module_init(mlx4_en_init); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index e11bc0ac880e..33bbcced8105 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -291,7 +291,7 @@ mlx4_en_filter_alloc(struct mlx4_en_priv *priv, int rxq_index, __be32 src_ip, __be32 dst_ip, u8 ip_proto, __be16 src_port, __be16 dst_port, u32 flow_id) { - struct mlx4_en_filter *filter = NULL; + struct mlx4_en_filter *filter; filter = kzalloc(sizeof(struct mlx4_en_filter), GFP_ATOMIC); if (!filter) @@ -2894,63 +2894,6 @@ static const struct xdp_metadata_ops mlx4_xdp_metadata_ops = { .xmo_rx_hash = mlx4_en_xdp_rx_hash, }; -struct mlx4_en_bond { - struct work_struct work; - struct mlx4_en_priv *priv; - int is_bonded; - struct mlx4_port_map port_map; -}; - -static void mlx4_en_bond_work(struct work_struct *work) -{ - struct mlx4_en_bond *bond = container_of(work, - struct mlx4_en_bond, - work); - int err = 0; - struct mlx4_dev *dev = bond->priv->mdev->dev; - - if (bond->is_bonded) { - if (!mlx4_is_bonded(dev)) { - err = mlx4_bond(dev); - if (err) - en_err(bond->priv, "Fail to bond device\n"); - } - if (!err) { - err = mlx4_port_map_set(dev, &bond->port_map); - if (err) - en_err(bond->priv, "Fail to set port map [%d][%d]: %d\n", - bond->port_map.port1, - bond->port_map.port2, - err); - } - } else if (mlx4_is_bonded(dev)) { - err = mlx4_unbond(dev); - if (err) - en_err(bond->priv, "Fail to unbond device\n"); - } - dev_put(bond->priv->dev); - kfree(bond); -} - -static int mlx4_en_queue_bond_work(struct mlx4_en_priv *priv, int is_bonded, - u8 v2p_p1, u8 v2p_p2) -{ - struct mlx4_en_bond *bond = NULL; - - bond = kzalloc(sizeof(*bond), GFP_ATOMIC); - if (!bond) - return -ENOMEM; - - INIT_WORK(&bond->work, mlx4_en_bond_work); - bond->priv = priv; - bond->is_bonded = is_bonded; - bond->port_map.port1 = v2p_p1; - bond->port_map.port2 = v2p_p2; - dev_hold(priv->dev); - queue_work(priv->mdev->workqueue, &bond->work); - return 0; -} - int mlx4_en_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -2960,14 +2903,13 @@ int mlx4_en_netdev_event(struct notifier_block *this, struct mlx4_dev *dev; int i, num_eth_ports = 0; bool do_bond = true; - struct mlx4_en_priv *priv; u8 v2p_port1 = 0; u8 v2p_port2 = 0; if (!net_eq(dev_net(ndev), &init_net)) return NOTIFY_DONE; - mdev = container_of(this, struct mlx4_en_dev, nb); + mdev = container_of(this, struct mlx4_en_dev, netdev_nb); dev = mdev->dev; /* Go into this mode only when two network devices set on two ports @@ -2995,7 +2937,6 @@ int mlx4_en_netdev_event(struct notifier_block *this, if ((do_bond && (event != NETDEV_BONDING_INFO)) || !port) return NOTIFY_DONE; - priv = netdev_priv(ndev); if (do_bond) { struct netdev_notifier_bonding_info *notifier_info = ptr; struct netdev_bonding_info *bonding_info = @@ -3062,8 +3003,7 @@ int mlx4_en_netdev_event(struct notifier_block *this, } } - mlx4_en_queue_bond_work(priv, do_bond, - v2p_port1, v2p_port2); + mlx4_queue_bond_work(dev, do_bond, v2p_port1, v2p_port2); return NOTIFY_DONE; } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 414e390e6b48..6598b10a9ff4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -501,7 +501,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) int port; int slave = 0; int ret; - u32 flr_slave; + int flr_slave; u8 update_slave_state; int i; enum slave_port_gen_event gen_event; @@ -606,8 +606,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) port = be32_to_cpu(eqe->event.port_change.port) >> 28; slaves_port = mlx4_phys_to_slaves_pport(dev, port); if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) { - mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN, - port); + mlx4_dispatch_event( + dev, MLX4_DEV_EVENT_PORT_DOWN, &port); mlx4_priv(dev)->sense.do_sense_port[port] = 1; if (!mlx4_is_master(dev)) break; @@ -647,7 +647,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) } } } else { - mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP, port); + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP, + &port); mlx4_priv(dev)->sense.do_sense_port[port] = 0; @@ -758,7 +759,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) } spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, - flr_slave); + &flr_slave); queue_work(priv->mfunc.master.comm_wq, &priv->mfunc.master.slave_flr_event_work); break; @@ -787,8 +788,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) break; case MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT: - mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_MGMT_CHANGE, - (unsigned long) eqe); + mlx4_dispatch_event( + dev, MLX4_DEV_EVENT_PORT_MGMT_CHANGE, eqe); break; case MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT: diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 65482f004e50..a371b970ac1e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -38,102 +38,131 @@ #include "mlx4.h" -struct mlx4_device_context { - struct list_head list; - struct list_head bond_list; - struct mlx4_interface *intf; - void *context; -}; - -static LIST_HEAD(intf_list); -static LIST_HEAD(dev_list); static DEFINE_MUTEX(intf_mutex); +static DEFINE_IDA(mlx4_adev_ida); -static void mlx4_add_device(struct mlx4_interface *intf, struct mlx4_priv *priv) +static bool is_eth_supported(struct mlx4_dev *dev) { - struct mlx4_device_context *dev_ctx; + for (int port = 1; port <= dev->caps.num_ports; port++) + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) + return true; - dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL); - if (!dev_ctx) - return; + return false; +} - dev_ctx->intf = intf; - dev_ctx->context = intf->add(&priv->dev); +static bool is_ib_supported(struct mlx4_dev *dev) +{ + for (int port = 1; port <= dev->caps.num_ports; port++) + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) + return true; - if (dev_ctx->context) { - spin_lock_irq(&priv->ctx_lock); - list_add_tail(&dev_ctx->list, &priv->ctx_list); - spin_unlock_irq(&priv->ctx_lock); - if (intf->activate) - intf->activate(&priv->dev, dev_ctx->context); - } else - kfree(dev_ctx); + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) + return true; + return false; } -static void mlx4_remove_device(struct mlx4_interface *intf, struct mlx4_priv *priv) +static const struct mlx4_adev_device { + const char *suffix; + bool (*is_supported)(struct mlx4_dev *dev); +} mlx4_adev_devices[] = { + { "eth", is_eth_supported }, + { "ib", is_ib_supported }, +}; + +int mlx4_adev_init(struct mlx4_dev *dev) { - struct mlx4_device_context *dev_ctx; + struct mlx4_priv *priv = mlx4_priv(dev); - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf == intf) { - spin_lock_irq(&priv->ctx_lock); - list_del(&dev_ctx->list); - spin_unlock_irq(&priv->ctx_lock); + priv->adev_idx = ida_alloc(&mlx4_adev_ida, GFP_KERNEL); + if (priv->adev_idx < 0) + return priv->adev_idx; - intf->remove(&priv->dev, dev_ctx->context); - kfree(dev_ctx); - return; - } + priv->adev = kcalloc(ARRAY_SIZE(mlx4_adev_devices), + sizeof(struct mlx4_adev *), GFP_KERNEL); + if (!priv->adev) { + ida_free(&mlx4_adev_ida, priv->adev_idx); + return -ENOMEM; + } + + return 0; } -int mlx4_register_interface(struct mlx4_interface *intf) +void mlx4_adev_cleanup(struct mlx4_dev *dev) { - struct mlx4_priv *priv; - - if (!intf->add || !intf->remove) - return -EINVAL; - - mutex_lock(&intf_mutex); + struct mlx4_priv *priv = mlx4_priv(dev); - list_add_tail(&intf->list, &intf_list); - list_for_each_entry(priv, &dev_list, dev_list) { - if (mlx4_is_mfunc(&priv->dev) && (intf->flags & MLX4_INTFF_BONDING)) { - mlx4_dbg(&priv->dev, - "SRIOV, disabling HA mode for intf proto %d\n", intf->protocol); - intf->flags &= ~MLX4_INTFF_BONDING; - } - mlx4_add_device(intf, priv); - } + kfree(priv->adev); + ida_free(&mlx4_adev_ida, priv->adev_idx); +} - mutex_unlock(&intf_mutex); +static void adev_release(struct device *dev) +{ + struct mlx4_adev *mlx4_adev = + container_of(dev, struct mlx4_adev, adev.dev); + struct mlx4_priv *priv = mlx4_priv(mlx4_adev->mdev); + int idx = mlx4_adev->idx; - return 0; + kfree(mlx4_adev); + priv->adev[idx] = NULL; } -EXPORT_SYMBOL_GPL(mlx4_register_interface); -void mlx4_unregister_interface(struct mlx4_interface *intf) +static struct mlx4_adev *add_adev(struct mlx4_dev *dev, int idx) { - struct mlx4_priv *priv; + struct mlx4_priv *priv = mlx4_priv(dev); + const char *suffix = mlx4_adev_devices[idx].suffix; + struct auxiliary_device *adev; + struct mlx4_adev *madev; + int ret; - mutex_lock(&intf_mutex); + madev = kzalloc(sizeof(*madev), GFP_KERNEL); + if (!madev) + return ERR_PTR(-ENOMEM); - list_for_each_entry(priv, &dev_list, dev_list) - mlx4_remove_device(intf, priv); + adev = &madev->adev; + adev->id = priv->adev_idx; + adev->name = suffix; + adev->dev.parent = &dev->persist->pdev->dev; + adev->dev.release = adev_release; + madev->mdev = dev; + madev->idx = idx; - list_del(&intf->list); + ret = auxiliary_device_init(adev); + if (ret) { + kfree(madev); + return ERR_PTR(ret); + } - mutex_unlock(&intf_mutex); + ret = auxiliary_device_add(adev); + if (ret) { + auxiliary_device_uninit(adev); + return ERR_PTR(ret); + } + return madev; +} + +static void del_adev(struct auxiliary_device *adev) +{ + auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); +} + +int mlx4_register_auxiliary_driver(struct mlx4_adrv *madrv) +{ + return auxiliary_driver_register(&madrv->adrv); +} +EXPORT_SYMBOL_GPL(mlx4_register_auxiliary_driver); + +void mlx4_unregister_auxiliary_driver(struct mlx4_adrv *madrv) +{ + auxiliary_driver_unregister(&madrv->adrv); } -EXPORT_SYMBOL_GPL(mlx4_unregister_interface); +EXPORT_SYMBOL_GPL(mlx4_unregister_auxiliary_driver); int mlx4_do_bond(struct mlx4_dev *dev, bool enable) { struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_device_context *dev_ctx = NULL, *temp_dev_ctx; - unsigned long flags; - int ret; - LIST_HEAD(bond_list); + int i, ret; if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)) return -EOPNOTSUPP; @@ -155,69 +184,178 @@ int mlx4_do_bond(struct mlx4_dev *dev, bool enable) dev->flags &= ~MLX4_FLAG_BONDED; } - spin_lock_irqsave(&priv->ctx_lock, flags); - list_for_each_entry_safe(dev_ctx, temp_dev_ctx, &priv->ctx_list, list) { - if (dev_ctx->intf->flags & MLX4_INTFF_BONDING) { - list_add_tail(&dev_ctx->bond_list, &bond_list); - list_del(&dev_ctx->list); + mutex_lock(&intf_mutex); + + for (i = 0; i < ARRAY_SIZE(mlx4_adev_devices); i++) { + struct mlx4_adev *madev = priv->adev[i]; + struct mlx4_adrv *madrv; + enum mlx4_protocol protocol; + + if (!madev) + continue; + + device_lock(&madev->adev.dev); + if (!madev->adev.dev.driver) { + device_unlock(&madev->adev.dev); + continue; + } + + madrv = container_of(madev->adev.dev.driver, struct mlx4_adrv, + adrv.driver); + if (!(madrv->flags & MLX4_INTFF_BONDING)) { + device_unlock(&madev->adev.dev); + continue; + } + + if (mlx4_is_mfunc(dev)) { + mlx4_dbg(dev, + "SRIOV, disabled HA mode for intf proto %d\n", + madrv->protocol); + device_unlock(&madev->adev.dev); + continue; } - } - spin_unlock_irqrestore(&priv->ctx_lock, flags); - list_for_each_entry(dev_ctx, &bond_list, bond_list) { - dev_ctx->intf->remove(dev, dev_ctx->context); - dev_ctx->context = dev_ctx->intf->add(dev); + protocol = madrv->protocol; + device_unlock(&madev->adev.dev); - spin_lock_irqsave(&priv->ctx_lock, flags); - list_add_tail(&dev_ctx->list, &priv->ctx_list); - spin_unlock_irqrestore(&priv->ctx_lock, flags); + del_adev(&madev->adev); + priv->adev[i] = add_adev(dev, i); + if (IS_ERR(priv->adev[i])) { + mlx4_warn(dev, "Device[%d] (%s) failed to load\n", i, + mlx4_adev_devices[i].suffix); + priv->adev[i] = NULL; + continue; + } - mlx4_dbg(dev, "Interface for protocol %d restarted with bonded mode %s\n", - dev_ctx->intf->protocol, enable ? - "enabled" : "disabled"); + mlx4_dbg(dev, + "Interface for protocol %d restarted with bonded mode %s\n", + protocol, enable ? "enabled" : "disabled"); } + + mutex_unlock(&intf_mutex); + return 0; } void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, - unsigned long param) + void *param) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + atomic_notifier_call_chain(&priv->event_nh, type, param); +} + +int mlx4_register_event_notifier(struct mlx4_dev *dev, + struct notifier_block *nb) { struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_device_context *dev_ctx; - unsigned long flags; - spin_lock_irqsave(&priv->ctx_lock, flags); + return atomic_notifier_chain_register(&priv->event_nh, nb); +} +EXPORT_SYMBOL(mlx4_register_event_notifier); - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf->event) - dev_ctx->intf->event(dev, dev_ctx->context, type, param); +int mlx4_unregister_event_notifier(struct mlx4_dev *dev, + struct notifier_block *nb) +{ + struct mlx4_priv *priv = mlx4_priv(dev); - spin_unlock_irqrestore(&priv->ctx_lock, flags); + return atomic_notifier_chain_unregister(&priv->event_nh, nb); } +EXPORT_SYMBOL(mlx4_unregister_event_notifier); -int mlx4_register_device(struct mlx4_dev *dev) +static int add_drivers(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int i, ret = 0; + + for (i = 0; i < ARRAY_SIZE(mlx4_adev_devices); i++) { + bool is_supported = false; + + if (priv->adev[i]) + continue; + + if (mlx4_adev_devices[i].is_supported) + is_supported = mlx4_adev_devices[i].is_supported(dev); + + if (!is_supported) + continue; + + priv->adev[i] = add_adev(dev, i); + if (IS_ERR(priv->adev[i])) { + mlx4_warn(dev, "Device[%d] (%s) failed to load\n", i, + mlx4_adev_devices[i].suffix); + /* We continue to rescan drivers and leave to the caller + * to make decision if to release everything or + * continue. */ + ret = PTR_ERR(priv->adev[i]); + priv->adev[i] = NULL; + } + } + return ret; +} + +static void delete_drivers(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_interface *intf; + bool delete_all; + int i; + + delete_all = !(dev->persist->interface_state & MLX4_INTERFACE_STATE_UP); + + for (i = ARRAY_SIZE(mlx4_adev_devices) - 1; i >= 0; i--) { + bool is_supported = false; + + if (!priv->adev[i]) + continue; + + if (mlx4_adev_devices[i].is_supported && !delete_all) + is_supported = mlx4_adev_devices[i].is_supported(dev); + + if (is_supported) + continue; + + del_adev(&priv->adev[i]->adev); + priv->adev[i] = NULL; + } +} + +/* This function is used after mlx4_dev is reconfigured. + */ +static int rescan_drivers_locked(struct mlx4_dev *dev) +{ + lockdep_assert_held(&intf_mutex); + + delete_drivers(dev); + if (!(dev->persist->interface_state & MLX4_INTERFACE_STATE_UP)) + return 0; + + return add_drivers(dev); +} + +int mlx4_register_device(struct mlx4_dev *dev) +{ + int ret; mutex_lock(&intf_mutex); dev->persist->interface_state |= MLX4_INTERFACE_STATE_UP; - list_add_tail(&priv->dev_list, &dev_list); - list_for_each_entry(intf, &intf_list, list) - mlx4_add_device(intf, priv); + + ret = rescan_drivers_locked(dev); mutex_unlock(&intf_mutex); + + if (ret) { + mlx4_unregister_device(dev); + return ret; + } + mlx4_start_catas_poll(dev); - return 0; + return ret; } void mlx4_unregister_device(struct mlx4_dev *dev) { - struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_interface *intf; - if (!(dev->persist->interface_state & MLX4_INTERFACE_STATE_UP)) return; @@ -236,35 +374,12 @@ void mlx4_unregister_device(struct mlx4_dev *dev) } mutex_lock(&intf_mutex); - list_for_each_entry(intf, &intf_list, list) - mlx4_remove_device(intf, priv); - - list_del(&priv->dev_list); dev->persist->interface_state &= ~MLX4_INTERFACE_STATE_UP; - mutex_unlock(&intf_mutex); -} - -void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port) -{ - struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_device_context *dev_ctx; - unsigned long flags; - void *result = NULL; + rescan_drivers_locked(dev); - spin_lock_irqsave(&priv->ctx_lock, flags); - - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf->protocol == proto && dev_ctx->intf->get_dev) { - result = dev_ctx->intf->get_dev(dev, dev_ctx->context, port); - break; - } - - spin_unlock_irqrestore(&priv->ctx_lock, flags); - - return result; + mutex_unlock(&intf_mutex); } -EXPORT_SYMBOL_GPL(mlx4_get_protocol_dev); struct devlink_port *mlx4_get_devlink_port(struct mlx4_dev *dev, int port) { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 61286b0d9b0c..2581226836b5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -42,7 +42,6 @@ #include <linux/slab.h> #include <linux/io-mapping.h> #include <linux/delay.h> -#include <linux/kmod.h> #include <linux/etherdevice.h> #include <net/devlink.h> @@ -864,7 +863,7 @@ static void mlx4_slave_destroy_special_qp_cap(struct mlx4_dev *dev) static int mlx4_slave_special_qp_cap(struct mlx4_dev *dev) { - struct mlx4_func_cap *func_cap = NULL; + struct mlx4_func_cap *func_cap; struct mlx4_caps *caps = &dev->caps; int i, err = 0; @@ -908,9 +907,9 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) { int err; u32 page_size; - struct mlx4_dev_cap *dev_cap = NULL; - struct mlx4_func_cap *func_cap = NULL; - struct mlx4_init_hca_param *hca_param = NULL; + struct mlx4_dev_cap *dev_cap; + struct mlx4_func_cap *func_cap; + struct mlx4_init_hca_param *hca_param; hca_param = kzalloc(sizeof(*hca_param), GFP_KERNEL); func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL); @@ -1091,27 +1090,6 @@ free_mem: return err; } -static void mlx4_request_modules(struct mlx4_dev *dev) -{ - int port; - int has_ib_port = false; - int has_eth_port = false; -#define EN_DRV_NAME "mlx4_en" -#define IB_DRV_NAME "mlx4_ib" - - for (port = 1; port <= dev->caps.num_ports; port++) { - if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) - has_ib_port = true; - else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) - has_eth_port = true; - } - - if (has_eth_port) - request_module_nowait(EN_DRV_NAME); - if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) - request_module_nowait(IB_DRV_NAME); -} - /* * Change the port configuration of the device. * Every user of this function must hold the port mutex. @@ -1147,7 +1125,6 @@ int mlx4_change_port_types(struct mlx4_dev *dev, mlx4_err(dev, "Failed to register device\n"); goto out; } - mlx4_request_modules(dev); } out: @@ -1441,7 +1418,7 @@ static int mlx4_mf_unbond(struct mlx4_dev *dev) return ret; } -int mlx4_bond(struct mlx4_dev *dev) +static int mlx4_bond(struct mlx4_dev *dev) { int ret = 0; struct mlx4_priv *priv = mlx4_priv(dev); @@ -1467,9 +1444,8 @@ int mlx4_bond(struct mlx4_dev *dev) return ret; } -EXPORT_SYMBOL_GPL(mlx4_bond); -int mlx4_unbond(struct mlx4_dev *dev) +static int mlx4_unbond(struct mlx4_dev *dev) { int ret = 0; struct mlx4_priv *priv = mlx4_priv(dev); @@ -1496,10 +1472,8 @@ int mlx4_unbond(struct mlx4_dev *dev) return ret; } -EXPORT_SYMBOL_GPL(mlx4_unbond); - -int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p) +static int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p) { u8 port1 = v2p->port1; u8 port2 = v2p->port2; @@ -1541,7 +1515,61 @@ int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p) mutex_unlock(&priv->bond_mutex); return err; } -EXPORT_SYMBOL_GPL(mlx4_port_map_set); + +struct mlx4_bond { + struct work_struct work; + struct mlx4_dev *dev; + int is_bonded; + struct mlx4_port_map port_map; +}; + +static void mlx4_bond_work(struct work_struct *work) +{ + struct mlx4_bond *bond = container_of(work, struct mlx4_bond, work); + int err = 0; + + if (bond->is_bonded) { + if (!mlx4_is_bonded(bond->dev)) { + err = mlx4_bond(bond->dev); + if (err) + mlx4_err(bond->dev, "Fail to bond device\n"); + } + if (!err) { + err = mlx4_port_map_set(bond->dev, &bond->port_map); + if (err) + mlx4_err(bond->dev, + "Fail to set port map [%d][%d]: %d\n", + bond->port_map.port1, + bond->port_map.port2, err); + } + } else if (mlx4_is_bonded(bond->dev)) { + err = mlx4_unbond(bond->dev); + if (err) + mlx4_err(bond->dev, "Fail to unbond device\n"); + } + put_device(&bond->dev->persist->pdev->dev); + kfree(bond); +} + +int mlx4_queue_bond_work(struct mlx4_dev *dev, int is_bonded, u8 v2p_p1, + u8 v2p_p2) +{ + struct mlx4_bond *bond; + + bond = kzalloc(sizeof(*bond), GFP_ATOMIC); + if (!bond) + return -ENOMEM; + + INIT_WORK(&bond->work, mlx4_bond_work); + get_device(&dev->persist->pdev->dev); + bond->dev = dev; + bond->is_bonded = is_bonded; + bond->port_map.port1 = v2p_p1; + bond->port_map.port2 = v2p_p2; + queue_work(mlx4_wq, &bond->work); + return 0; +} +EXPORT_SYMBOL(mlx4_queue_bond_work); static int mlx4_load_fw(struct mlx4_dev *dev) { @@ -3375,8 +3403,11 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, devl_assert_locked(devlink); dev = &priv->dev; - INIT_LIST_HEAD(&priv->ctx_list); - spin_lock_init(&priv->ctx_lock); + err = mlx4_adev_init(dev); + if (err) + return err; + + ATOMIC_INIT_NOTIFIER_HEAD(&priv->event_nh); mutex_init(&priv->port_mutex); mutex_init(&priv->bond_mutex); @@ -3402,10 +3433,11 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, err = mlx4_get_ownership(dev); if (err) { if (err < 0) - return err; + goto err_adev; else { mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n"); - return -EINVAL; + err = -EINVAL; + goto err_adev; } } @@ -3674,8 +3706,6 @@ slave_start: if (err) goto err_port; - mlx4_request_modules(dev); - mlx4_sense_init(dev); mlx4_start_sense(dev); @@ -3753,6 +3783,9 @@ err_sriov: mlx4_free_ownership(dev); kfree(dev_cap); + +err_adev: + mlx4_adev_cleanup(dev); return err; } @@ -4133,6 +4166,8 @@ static void mlx4_unload_one(struct pci_dev *pdev) mlx4_slave_destroy_special_qp_cap(dev); kfree(dev->dev_vfs); + mlx4_adev_cleanup(dev); + mlx4_clean_dev(dev); priv->pci_dev_data = pci_dev_data; priv->removed = 1; @@ -4520,6 +4555,9 @@ static int __init mlx4_init(void) { int ret; + WARN_ONCE(strcmp(MLX4_ADEV_NAME, KBUILD_MODNAME), + "mlx4_core name not in sync with kernel module name"); + if (mlx4_verify_params()) return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index f1716a83a4d3..24d0c7c46878 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -294,7 +294,7 @@ static bool check_duplicate_entry(struct mlx4_dev *dev, u8 port, struct mlx4_promisc_qp *dqp, *tmp_dqp; if (port < 1 || port > dev->caps.num_ports) - return NULL; + return false; s_steer = &mlx4_priv(dev)->steer[port - 1]; @@ -375,7 +375,7 @@ static bool can_remove_steering_entry(struct mlx4_dev *dev, u8 port, bool ret = false; if (port < 1 || port > dev->caps.num_ports) - return NULL; + return false; s_steer = &mlx4_priv(dev)->steer[port - 1]; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 6ccf340660d9..d7d856d1758a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -47,6 +47,8 @@ #include <linux/spinlock.h> #include <net/devlink.h> #include <linux/rwsem.h> +#include <linux/auxiliary_bus.h> +#include <linux/notifier.h> #include <linux/mlx4/device.h> #include <linux/mlx4/driver.h> @@ -862,6 +864,11 @@ struct mlx4_steer { struct list_head steer_entries[MLX4_NUM_STEERS]; }; +struct mlx4_port_map { + u8 port1; + u8 port2; +}; + enum { MLX4_PCI_DEV_IS_VF = 1 << 0, MLX4_PCI_DEV_FORCE_SENSE_PORT = 1 << 1, @@ -875,9 +882,9 @@ enum { struct mlx4_priv { struct mlx4_dev dev; - struct list_head dev_list; - struct list_head ctx_list; - spinlock_t ctx_lock; + struct mlx4_adev **adev; + int adev_idx; + struct atomic_notifier_head event_nh; int pci_dev_data; int removed; @@ -1045,10 +1052,13 @@ void mlx4_catas_end(struct mlx4_dev *dev); int mlx4_crdump_init(struct mlx4_dev *dev); void mlx4_crdump_end(struct mlx4_dev *dev); int mlx4_restart_one(struct pci_dev *pdev); + +int mlx4_adev_init(struct mlx4_dev *dev); +void mlx4_adev_cleanup(struct mlx4_dev *dev); int mlx4_register_device(struct mlx4_dev *dev); void mlx4_unregister_device(struct mlx4_dev *dev); void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, - unsigned long param); + void *param); struct mlx4_dev_cap; struct mlx4_init_hca_param; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 321f801c1d7c..efe3f97b874f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -49,6 +49,7 @@ #include <linux/ptp_clock_kernel.h> #include <linux/irq.h> #include <net/xdp.h> +#include <linux/notifier.h> #include <linux/mlx4/device.h> #include <linux/mlx4/qp.h> @@ -432,7 +433,8 @@ struct mlx4_en_dev { unsigned long last_overflow_check; struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_clock_info; - struct notifier_block nb; + struct notifier_block netdev_nb; + struct notifier_block mlx_nb; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index bb1d7b039a7e..c4f4de82e29e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -12,6 +12,7 @@ config MLX5_CORE depends on MLXFW || !MLXFW depends on PTP_1588_CLOCK_OPTIONAL depends on PCI_HYPERV_INTERFACE || !PCI_HYPERV_INTERFACE + depends on HWMON || !HWMON help Core driver for low level functionality of the ConnectX-4 and Connect-IB cards by Mellanox Technologies. @@ -139,7 +140,7 @@ config MLX5_CORE_IPOIB help MLX5 IPoIB offloads & acceleration support. -config MLX5_EN_MACSEC +config MLX5_MACSEC bool "Connect-X support for MACSec offload" depends on MLX5_CORE_EN depends on MACSEC diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 35f00700a4d6..7e94caca4888 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -69,16 +69,20 @@ mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o # mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ ecpf.o rdma.o esw/legacy.o \ - esw/devlink_port.o esw/vporttbl.o esw/qos.o + esw/devlink_port.o esw/vporttbl.o esw/qos.o esw/ipsec.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \ esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o +ifneq ($(CONFIG_MLX5_EN_IPSEC),) + mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/ipsec_fs.o +endif + mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o esw/bridge_mcast.o esw/bridge_debugfs.o \ en/rep/bridge.o -mlx5_core-$(CONFIG_THERMAL) += thermal.o +mlx5_core-$(CONFIG_HWMON) += hwmon.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o @@ -94,7 +98,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib # mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o -mlx5_core-$(CONFIG_MLX5_EN_MACSEC) += en_accel/macsec.o en_accel/macsec_fs.o \ +mlx5_core-$(CONFIG_MLX5_MACSEC) += en_accel/macsec.o lib/macsec_fs.o \ en_accel/macsec_stats.o mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index d532883b42d7..afb348579577 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -162,18 +162,18 @@ static int cmd_alloc_index(struct mlx5_cmd *cmd) int ret; spin_lock_irqsave(&cmd->alloc_lock, flags); - ret = find_first_bit(&cmd->bitmask, cmd->max_reg_cmds); - if (ret < cmd->max_reg_cmds) - clear_bit(ret, &cmd->bitmask); + ret = find_first_bit(&cmd->vars.bitmask, cmd->vars.max_reg_cmds); + if (ret < cmd->vars.max_reg_cmds) + clear_bit(ret, &cmd->vars.bitmask); spin_unlock_irqrestore(&cmd->alloc_lock, flags); - return ret < cmd->max_reg_cmds ? ret : -ENOMEM; + return ret < cmd->vars.max_reg_cmds ? ret : -ENOMEM; } static void cmd_free_index(struct mlx5_cmd *cmd, int idx) { lockdep_assert_held(&cmd->alloc_lock); - set_bit(idx, &cmd->bitmask); + set_bit(idx, &cmd->vars.bitmask); } static void cmd_ent_get(struct mlx5_cmd_work_ent *ent) @@ -192,7 +192,7 @@ static void cmd_ent_put(struct mlx5_cmd_work_ent *ent) if (ent->idx >= 0) { cmd_free_index(cmd, ent->idx); - up(ent->page_queue ? &cmd->pages_sem : &cmd->sem); + up(ent->page_queue ? &cmd->vars.pages_sem : &cmd->vars.sem); } cmd_free_ent(ent); @@ -202,7 +202,7 @@ out: static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) { - return cmd->cmd_buf + (idx << cmd->log_stride); + return cmd->cmd_buf + (idx << cmd->vars.log_stride); } static int mlx5_calc_cmd_blocks(struct mlx5_cmd_msg *msg) @@ -974,7 +974,7 @@ static void cmd_work_handler(struct work_struct *work) cb_timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD)); complete(&ent->handling); - sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; + sem = ent->page_queue ? &cmd->vars.pages_sem : &cmd->vars.sem; down(sem); if (!ent->page_queue) { alloc_ret = cmd_alloc_index(cmd); @@ -994,9 +994,9 @@ static void cmd_work_handler(struct work_struct *work) } ent->idx = alloc_ret; } else { - ent->idx = cmd->max_reg_cmds; + ent->idx = cmd->vars.max_reg_cmds; spin_lock_irqsave(&cmd->alloc_lock, flags); - clear_bit(ent->idx, &cmd->bitmask); + clear_bit(ent->idx, &cmd->vars.bitmask); spin_unlock_irqrestore(&cmd->alloc_lock, flags); } @@ -1225,8 +1225,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, goto out_free; ds = ent->ts2 - ent->ts1; - if (ent->op < MLX5_CMD_OP_MAX) { - stats = &cmd->stats[ent->op]; + stats = xa_load(&cmd->stats, ent->op); + if (stats) { spin_lock_irq(&stats->lock); stats->sum += ds; ++stats->n; @@ -1548,7 +1548,6 @@ static void clean_debug_files(struct mlx5_core_dev *dev) if (!mlx5_debugfs_root) return; - mlx5_cmdif_debugfs_cleanup(dev); debugfs_remove_recursive(dbg->dbg_root); } @@ -1563,8 +1562,6 @@ static void create_debugfs_files(struct mlx5_core_dev *dev) debugfs_create_file("out_len", 0600, dbg->dbg_root, dev, &olfops); debugfs_create_u8("status", 0600, dbg->dbg_root, &dbg->status); debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops); - - mlx5_cmdif_debugfs_init(dev); } void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode) @@ -1572,15 +1569,15 @@ void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode) struct mlx5_cmd *cmd = &dev->cmd; int i; - for (i = 0; i < cmd->max_reg_cmds; i++) - down(&cmd->sem); - down(&cmd->pages_sem); + for (i = 0; i < cmd->vars.max_reg_cmds; i++) + down(&cmd->vars.sem); + down(&cmd->vars.pages_sem); cmd->allowed_opcode = opcode; - up(&cmd->pages_sem); - for (i = 0; i < cmd->max_reg_cmds; i++) - up(&cmd->sem); + up(&cmd->vars.pages_sem); + for (i = 0; i < cmd->vars.max_reg_cmds; i++) + up(&cmd->vars.sem); } static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) @@ -1588,15 +1585,15 @@ static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) struct mlx5_cmd *cmd = &dev->cmd; int i; - for (i = 0; i < cmd->max_reg_cmds; i++) - down(&cmd->sem); - down(&cmd->pages_sem); + for (i = 0; i < cmd->vars.max_reg_cmds; i++) + down(&cmd->vars.sem); + down(&cmd->vars.pages_sem); cmd->mode = mode; - up(&cmd->pages_sem); - for (i = 0; i < cmd->max_reg_cmds; i++) - up(&cmd->sem); + up(&cmd->vars.pages_sem); + for (i = 0; i < cmd->vars.max_reg_cmds; i++) + up(&cmd->vars.sem); } static int cmd_comp_notifier(struct notifier_block *nb, @@ -1655,7 +1652,7 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force /* there can be at most 32 command queues */ vector = vec & 0xffffffff; - for (i = 0; i < (1 << cmd->log_sz); i++) { + for (i = 0; i < (1 << cmd->vars.log_sz); i++) { if (test_bit(i, &vector)) { ent = cmd->ent_arr[i]; @@ -1698,8 +1695,8 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force if (ent->callback) { ds = ent->ts2 - ent->ts1; - if (ent->op < MLX5_CMD_OP_MAX) { - stats = &cmd->stats[ent->op]; + stats = xa_load(&cmd->stats, ent->op); + if (stats) { spin_lock_irqsave(&stats->lock, flags); stats->sum += ds; ++stats->n; @@ -1744,7 +1741,7 @@ static void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev) /* wait for pending handlers to complete */ mlx5_eq_synchronize_cmd_irq(dev); spin_lock_irqsave(&dev->cmd.alloc_lock, flags); - vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); + vector = ~dev->cmd.vars.bitmask & ((1ul << (1 << dev->cmd.vars.log_sz)) - 1); if (!vector) goto no_trig; @@ -1753,14 +1750,14 @@ static void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev) * to guarantee pending commands will not get freed in the meanwhile. * For that reason, it also has to be done inside the alloc_lock. */ - for_each_set_bit(i, &bitmask, (1 << cmd->log_sz)) + for_each_set_bit(i, &bitmask, (1 << cmd->vars.log_sz)) cmd_ent_get(cmd->ent_arr[i]); vector |= MLX5_TRIGGERED_CMD_COMP; spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); mlx5_core_dbg(dev, "vector 0x%llx\n", vector); mlx5_cmd_comp_handler(dev, vector, true); - for_each_set_bit(i, &bitmask, (1 << cmd->log_sz)) + for_each_set_bit(i, &bitmask, (1 << cmd->vars.log_sz)) cmd_ent_put(cmd->ent_arr[i]); return; @@ -1773,22 +1770,22 @@ void mlx5_cmd_flush(struct mlx5_core_dev *dev) struct mlx5_cmd *cmd = &dev->cmd; int i; - for (i = 0; i < cmd->max_reg_cmds; i++) { - while (down_trylock(&cmd->sem)) { + for (i = 0; i < cmd->vars.max_reg_cmds; i++) { + while (down_trylock(&cmd->vars.sem)) { mlx5_cmd_trigger_completions(dev); cond_resched(); } } - while (down_trylock(&cmd->pages_sem)) { + while (down_trylock(&cmd->vars.pages_sem)) { mlx5_cmd_trigger_completions(dev); cond_resched(); } /* Unlock cmdif */ - up(&cmd->pages_sem); - for (i = 0; i < cmd->max_reg_cmds; i++) - up(&cmd->sem); + up(&cmd->vars.pages_sem); + for (i = 0; i < cmd->vars.max_reg_cmds; i++) + up(&cmd->vars.sem); } static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, @@ -1858,7 +1855,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, /* atomic context may not sleep */ if (callback) return -EINVAL; - down(&dev->cmd.throttle_sem); + down(&dev->cmd.vars.throttle_sem); } pages_queue = is_manage_pages(in); @@ -1903,7 +1900,7 @@ out_in: free_msg(dev, inb); out_up: if (throttle_op) - up(&dev->cmd.throttle_sem); + up(&dev->cmd.vars.throttle_sem); return err; } @@ -1926,7 +1923,9 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, if (!err || !(strcmp(namep, "unknown command opcode"))) return; - stats = &dev->cmd.stats[opcode]; + stats = xa_load(&dev->cmd.stats, opcode); + if (!stats) + return; spin_lock_irq(&stats->lock); stats->failed++; if (err < 0) @@ -2190,19 +2189,8 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) int size = sizeof(struct mlx5_cmd_prot_block); int align = roundup_pow_of_two(size); struct mlx5_cmd *cmd = &dev->cmd; - u32 cmd_h, cmd_l; - u16 cmd_if_rev; + u32 cmd_l; int err; - int i; - - memset(cmd, 0, sizeof(*cmd)); - cmd_if_rev = cmdif_rev(dev); - if (cmd_if_rev != CMD_IF_REV) { - mlx5_core_err(dev, - "Driver cmdif rev(%d) differs from firmware's(%d)\n", - CMD_IF_REV, cmd_if_rev); - return -EINVAL; - } cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); if (!cmd->pool) @@ -2212,62 +2200,16 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) if (err) goto err_free_pool; - cmd_l = ioread32be(&dev->iseg->cmdq_addr_l_sz) & 0xff; - cmd->log_sz = cmd_l >> 4 & 0xf; - cmd->log_stride = cmd_l & 0xf; - if (1 << cmd->log_sz > MLX5_MAX_COMMANDS) { - mlx5_core_err(dev, "firmware reports too many outstanding commands %d\n", - 1 << cmd->log_sz); - err = -EINVAL; - goto err_free_page; - } - - if (cmd->log_sz + cmd->log_stride > MLX5_ADAPTER_PAGE_SHIFT) { - mlx5_core_err(dev, "command queue size overflow\n"); - err = -EINVAL; - goto err_free_page; - } - - cmd->state = MLX5_CMDIF_STATE_DOWN; - cmd->checksum_disabled = 1; - cmd->max_reg_cmds = (1 << cmd->log_sz) - 1; - cmd->bitmask = (1UL << cmd->max_reg_cmds) - 1; - - cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; - if (cmd->cmdif_rev > CMD_IF_REV) { - mlx5_core_err(dev, "driver does not support command interface version. driver %d, firmware %d\n", - CMD_IF_REV, cmd->cmdif_rev); - err = -EOPNOTSUPP; - goto err_free_page; - } - - spin_lock_init(&cmd->alloc_lock); - spin_lock_init(&cmd->token_lock); - for (i = 0; i < MLX5_CMD_OP_MAX; i++) - spin_lock_init(&cmd->stats[i].lock); - - sema_init(&cmd->sem, cmd->max_reg_cmds); - sema_init(&cmd->pages_sem, 1); - sema_init(&cmd->throttle_sem, DIV_ROUND_UP(cmd->max_reg_cmds, 2)); - - cmd_h = (u32)((u64)(cmd->dma) >> 32); cmd_l = (u32)(cmd->dma); if (cmd_l & 0xfff) { mlx5_core_err(dev, "invalid command queue address\n"); err = -ENOMEM; - goto err_free_page; + goto err_cmd_page; } + cmd->checksum_disabled = 1; - iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h); - iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz); - - /* Make sure firmware sees the complete address before we proceed */ - wmb(); - - mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma)); - - cmd->mode = CMD_MODE_POLLING; - cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL; + spin_lock_init(&cmd->alloc_lock); + spin_lock_init(&cmd->token_lock); create_msg_cache(dev); @@ -2279,16 +2221,14 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) goto err_cache; } - create_debugfs_files(dev); + mlx5_cmdif_debugfs_init(dev); return 0; err_cache: destroy_msg_cache(dev); - -err_free_page: +err_cmd_page: free_cmd_page(dev, cmd); - err_free_pool: dma_pool_destroy(cmd->pool); return err; @@ -2298,13 +2238,78 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) { struct mlx5_cmd *cmd = &dev->cmd; - clean_debug_files(dev); + mlx5_cmdif_debugfs_cleanup(dev); destroy_workqueue(cmd->wq); destroy_msg_cache(dev); free_cmd_page(dev, cmd); dma_pool_destroy(cmd->pool); } +int mlx5_cmd_enable(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + u32 cmd_h, cmd_l; + + memset(&cmd->vars, 0, sizeof(cmd->vars)); + cmd->vars.cmdif_rev = cmdif_rev(dev); + if (cmd->vars.cmdif_rev != CMD_IF_REV) { + mlx5_core_err(dev, + "Driver cmdif rev(%d) differs from firmware's(%d)\n", + CMD_IF_REV, cmd->vars.cmdif_rev); + return -EINVAL; + } + + cmd_l = ioread32be(&dev->iseg->cmdq_addr_l_sz) & 0xff; + cmd->vars.log_sz = cmd_l >> 4 & 0xf; + cmd->vars.log_stride = cmd_l & 0xf; + if (1 << cmd->vars.log_sz > MLX5_MAX_COMMANDS) { + mlx5_core_err(dev, "firmware reports too many outstanding commands %d\n", + 1 << cmd->vars.log_sz); + return -EINVAL; + } + + if (cmd->vars.log_sz + cmd->vars.log_stride > MLX5_ADAPTER_PAGE_SHIFT) { + mlx5_core_err(dev, "command queue size overflow\n"); + return -EINVAL; + } + + cmd->state = MLX5_CMDIF_STATE_DOWN; + cmd->vars.max_reg_cmds = (1 << cmd->vars.log_sz) - 1; + cmd->vars.bitmask = (1UL << cmd->vars.max_reg_cmds) - 1; + + sema_init(&cmd->vars.sem, cmd->vars.max_reg_cmds); + sema_init(&cmd->vars.pages_sem, 1); + sema_init(&cmd->vars.throttle_sem, DIV_ROUND_UP(cmd->vars.max_reg_cmds, 2)); + + cmd_h = (u32)((u64)(cmd->dma) >> 32); + cmd_l = (u32)(cmd->dma); + if (WARN_ON(cmd_l & 0xfff)) + return -EINVAL; + + iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h); + iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz); + + /* Make sure firmware sees the complete address before we proceed */ + wmb(); + + mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma)); + + cmd->mode = CMD_MODE_POLLING; + cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL; + + create_debugfs_files(dev); + + return 0; +} + +void mlx5_cmd_disable(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + + clean_debug_files(dev); + flush_workqueue(cmd->wq); +} + void mlx5_cmd_set_state(struct mlx5_core_dev *dev, enum mlx5_cmdif_state cmdif_state) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 2138f28a2931..09652dc89115 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -176,8 +176,8 @@ static ssize_t slots_read(struct file *filp, char __user *buf, size_t count, int ret; cmd = filp->private_data; - weight = bitmap_weight(&cmd->bitmask, cmd->max_reg_cmds); - field = cmd->max_reg_cmds - weight; + weight = bitmap_weight(&cmd->vars.bitmask, cmd->vars.max_reg_cmds); + field = cmd->vars.max_reg_cmds - weight; ret = snprintf(tbuf, sizeof(tbuf), "%d\n", field); return simple_read_from_buffer(buf, count, pos, tbuf, ret); } @@ -188,6 +188,24 @@ static const struct file_operations slots_fops = { .read = slots_read, }; +static struct mlx5_cmd_stats * +mlx5_cmdif_alloc_stats(struct xarray *stats_xa, int opcode) +{ + struct mlx5_cmd_stats *stats = kzalloc(sizeof(*stats), GFP_KERNEL); + int err; + + if (!stats) + return NULL; + + err = xa_insert(stats_xa, opcode, stats, GFP_KERNEL); + if (err) { + kfree(stats); + return NULL; + } + spin_lock_init(&stats->lock); + return stats; +} + void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) { struct mlx5_cmd_stats *stats; @@ -200,10 +218,14 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) debugfs_create_file("slots_inuse", 0400, *cmd, &dev->cmd, &slots_fops); + xa_init(&dev->cmd.stats); + for (i = 0; i < MLX5_CMD_OP_MAX; i++) { - stats = &dev->cmd.stats[i]; namep = mlx5_command_str(i); if (strcmp(namep, "unknown command opcode")) { + stats = mlx5_cmdif_alloc_stats(&dev->cmd.stats, i); + if (!stats) + continue; stats->root = debugfs_create_dir(namep, *cmd); debugfs_create_file("average", 0400, stats->root, stats, @@ -224,7 +246,13 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev) { + struct mlx5_cmd_stats *stats; + unsigned long i; + debugfs_remove_recursive(dev->priv.dbg.cmdif_debugfs); + xa_for_each(&dev->cmd.stats, i, stats) + kfree(stats); + xa_destroy(&dev->cmd.stats); } void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index edb06fb9bbc5..7909f378dc93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -36,6 +36,7 @@ #include <linux/mlx5/vport.h> #include "mlx5_core.h" #include "devlink.h" +#include "lag/lag.h" /* intf dev list mutex */ static DEFINE_MUTEX(mlx5_intf_mutex); @@ -587,10 +588,7 @@ static int next_phys_dev_lag(struct device *dev, const void *data) if (!mdev) return 0; - if (!MLX5_CAP_GEN(mdev, vport_group_manager) || - !MLX5_CAP_GEN(mdev, lag_master) || - (MLX5_CAP_GEN(mdev, num_lag_ports) > MLX5_MAX_PORTS || - MLX5_CAP_GEN(mdev, num_lag_ports) <= 1)) + if (!mlx5_lag_is_supported(mdev)) return 0; return _next_phys_dev(mdev, data); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 3d82ec890666..af8460bb257b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -212,6 +212,9 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a /* On fw_activate action, also driver is reloaded and reinit performed */ *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); ret = mlx5_load_one_devl_locked(dev, true); + if (ret) + return ret; + ret = mlx5_fw_reset_verify_fw_complete(dev, extack); break; default: /* Unsupported action should not get to this function */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index defba5bd91d9..961f75da6227 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -6,6 +6,14 @@ #include <net/devlink.h> +enum mlx5_devlink_resource_id { + MLX5_DL_RES_MAX_LOCAL_SFS = 1, + MLX5_DL_RES_MAX_EXTERNAL_SFS, + + __MLX5_ID_RES_MAX, + MLX5_ID_RES_MAX = __MLX5_ID_RES_MAX - 1, +}; + enum mlx5_devlink_param_id { MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c index b0128336ff01..e869c65d8e90 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c @@ -2,6 +2,7 @@ /* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */ #include "reporter_vnic.h" +#include "en_stats.h" #include "devlink.h" #define VNIC_ENV_GET64(vnic_env_stats, c) \ @@ -36,55 +37,72 @@ int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, if (err) return err; - err = devlink_fmsg_u64_pair_put(fmsg, "total_error_queues", - VNIC_ENV_GET64(&vnic, total_error_queues)); - if (err) - return err; - - err = devlink_fmsg_u64_pair_put(fmsg, "send_queue_priority_update_flow", - VNIC_ENV_GET64(&vnic, send_queue_priority_update_flow)); - if (err) - return err; - - err = devlink_fmsg_u64_pair_put(fmsg, "comp_eq_overrun", - VNIC_ENV_GET64(&vnic, comp_eq_overrun)); - if (err) - return err; - - err = devlink_fmsg_u64_pair_put(fmsg, "async_eq_overrun", - VNIC_ENV_GET64(&vnic, async_eq_overrun)); - if (err) - return err; - - err = devlink_fmsg_u64_pair_put(fmsg, "cq_overrun", - VNIC_ENV_GET64(&vnic, cq_overrun)); - if (err) - return err; - - err = devlink_fmsg_u64_pair_put(fmsg, "invalid_command", - VNIC_ENV_GET64(&vnic, invalid_command)); - if (err) - return err; - - err = devlink_fmsg_u64_pair_put(fmsg, "quota_exceeded_command", - VNIC_ENV_GET64(&vnic, quota_exceeded_command)); - if (err) - return err; - - err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard", - VNIC_ENV_GET64(&vnic, nic_receive_steering_discard)); - if (err) - return err; - - err = devlink_fmsg_u64_pair_put(fmsg, "generated_pkt_steering_fail", - VNIC_ENV_GET64(&vnic, generated_pkt_steering_fail)); - if (err) - return err; - - err = devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail", - VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail)); - if (err) - return err; + if (MLX5_CAP_GEN(dev, vnic_env_queue_counters)) { + err = devlink_fmsg_u32_pair_put(fmsg, "total_error_queues", + VNIC_ENV_GET(&vnic, total_error_queues)); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "send_queue_priority_update_flow", + VNIC_ENV_GET(&vnic, + send_queue_priority_update_flow)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, eq_overrun_count)) { + err = devlink_fmsg_u32_pair_put(fmsg, "comp_eq_overrun", + VNIC_ENV_GET(&vnic, comp_eq_overrun)); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "async_eq_overrun", + VNIC_ENV_GET(&vnic, async_eq_overrun)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, vnic_env_cq_overrun)) { + err = devlink_fmsg_u32_pair_put(fmsg, "cq_overrun", + VNIC_ENV_GET(&vnic, cq_overrun)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, invalid_command_count)) { + err = devlink_fmsg_u32_pair_put(fmsg, "invalid_command", + VNIC_ENV_GET(&vnic, invalid_command)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, quota_exceeded_count)) { + err = devlink_fmsg_u32_pair_put(fmsg, "quota_exceeded_command", + VNIC_ENV_GET(&vnic, quota_exceeded_command)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, nic_receive_steering_discard)) { + err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard", + VNIC_ENV_GET64(&vnic, + nic_receive_steering_discard)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, vnic_env_cnt_steering_fail)) { + err = devlink_fmsg_u64_pair_put(fmsg, "generated_pkt_steering_fail", + VNIC_ENV_GET64(&vnic, + generated_pkt_steering_fail)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail", + VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail)); + if (err) + return err; + } err = devlink_fmsg_obj_nest_end(fmsg); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b1807bfb815f..86f2690c5e01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -193,7 +193,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) { return is_kdump_kernel() ? MLX5E_MIN_NUM_CHANNELS : - min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS); + min_t(int, mlx5_comp_vectors_max(mdev), MLX5E_MAX_NUM_CHANNELS); } /* The maximum WQE size can be retrieved by max_wqe_sz_sq in @@ -917,7 +917,7 @@ struct mlx5e_priv { const struct mlx5e_profile *profile; void *ppriv; -#ifdef CONFIG_MLX5_EN_MACSEC +#ifdef CONFIG_MLX5_MACSEC struct mlx5e_macsec *macsec; #endif #ifdef CONFIG_MLX5_EN_IPSEC @@ -1167,9 +1167,6 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc); int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc); -int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, - u32 *rule_locs); -int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd); u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index 0107e4e73bb0..415840c3ef84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -18,6 +18,7 @@ void mlx5e_reporter_tx_create(struct mlx5e_priv *priv); void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv); void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq); int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq); +void mlx5e_reporter_tx_ptpsq_unhealthy(struct mlx5e_ptpsq *ptpsq); int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 5ce28ff7685f..e097f336e1c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -6,6 +6,7 @@ #include "en/port.h" #include "en_accel/en_accel.h" #include "en_accel/ipsec.h" +#include <net/page_pool/types.h> #include <net/xdp_sock_drv.h> static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index b0b429a0321e..bb11e644d24f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -2,9 +2,12 @@ // Copyright (c) 2020 Mellanox Technologies #include "en/ptp.h" +#include "en/health.h" #include "en/txrx.h" #include "en/params.h" #include "en/fs_tt_redirect.h" +#include <linux/list.h> +#include <linux/spinlock.h> struct mlx5e_ptp_fs { struct mlx5_flow_handle *l2_rule; @@ -19,6 +22,48 @@ struct mlx5e_ptp_params { struct mlx5e_rq_param rq_param; }; +struct mlx5e_ptp_port_ts_cqe_tracker { + u8 metadata_id; + bool inuse : 1; + struct list_head entry; +}; + +struct mlx5e_ptp_port_ts_cqe_list { + struct mlx5e_ptp_port_ts_cqe_tracker *nodes; + struct list_head tracker_list_head; + /* Sync list operations in xmit and napi_poll contexts */ + spinlock_t tracker_list_lock; +}; + +static inline void +mlx5e_ptp_port_ts_cqe_list_add(struct mlx5e_ptp_port_ts_cqe_list *list, u8 metadata) +{ + struct mlx5e_ptp_port_ts_cqe_tracker *tracker = &list->nodes[metadata]; + + WARN_ON_ONCE(tracker->inuse); + tracker->inuse = true; + spin_lock(&list->tracker_list_lock); + list_add_tail(&tracker->entry, &list->tracker_list_head); + spin_unlock(&list->tracker_list_lock); +} + +static void +mlx5e_ptp_port_ts_cqe_list_remove(struct mlx5e_ptp_port_ts_cqe_list *list, u8 metadata) +{ + struct mlx5e_ptp_port_ts_cqe_tracker *tracker = &list->nodes[metadata]; + + WARN_ON_ONCE(!tracker->inuse); + tracker->inuse = false; + spin_lock(&list->tracker_list_lock); + list_del(&tracker->entry); + spin_unlock(&list->tracker_list_lock); +} + +void mlx5e_ptpsq_track_metadata(struct mlx5e_ptpsq *ptpsq, u8 metadata) +{ + mlx5e_ptp_port_ts_cqe_list_add(ptpsq->ts_cqe_pending_list, metadata); +} + struct mlx5e_skb_cb_hwtstamp { ktime_t cqe_hwtstamp; ktime_t port_hwtstamp; @@ -79,75 +124,97 @@ void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type, memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp)); } -#define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask) - -static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_ci, u16 skb_id) +static struct sk_buff * +mlx5e_ptp_metadata_map_lookup(struct mlx5e_ptp_metadata_map *map, u16 metadata) { - return (ptpsq->ts_cqe_ctr_mask && (skb_ci != skb_id)); + return map->data[metadata]; } -static bool mlx5e_ptp_ts_cqe_ooo(struct mlx5e_ptpsq *ptpsq, u16 skb_id) +static struct sk_buff * +mlx5e_ptp_metadata_map_remove(struct mlx5e_ptp_metadata_map *map, u16 metadata) { - u16 skb_ci = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc); - u16 skb_pi = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc); + struct sk_buff *skb; - if (PTP_WQE_CTR2IDX(skb_id - skb_ci) >= PTP_WQE_CTR2IDX(skb_pi - skb_ci)) - return true; + skb = map->data[metadata]; + map->data[metadata] = NULL; - return false; + return skb; } -static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_ci, - u16 skb_id, int budget) +static bool mlx5e_ptp_metadata_map_unhealthy(struct mlx5e_ptp_metadata_map *map) { - struct skb_shared_hwtstamps hwts = {}; - struct sk_buff *skb; + /* Considered beginning unhealthy state if size * 15 / 2^4 cannot be reclaimed. */ + return map->undelivered_counter > (map->capacity >> 4) * 15; +} - ptpsq->cq_stats->resync_event++; +static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq, + ktime_t port_tstamp) +{ + struct mlx5e_ptp_port_ts_cqe_list *cqe_list = ptpsq->ts_cqe_pending_list; + ktime_t timeout = ns_to_ktime(MLX5E_PTP_TS_CQE_UNDELIVERED_TIMEOUT); + struct mlx5e_ptp_metadata_map *metadata_map = &ptpsq->metadata_map; + struct mlx5e_ptp_port_ts_cqe_tracker *pos, *n; + + spin_lock(&cqe_list->tracker_list_lock); + list_for_each_entry_safe(pos, n, &cqe_list->tracker_list_head, entry) { + struct sk_buff *skb = + mlx5e_ptp_metadata_map_lookup(metadata_map, pos->metadata_id); + ktime_t dma_tstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp; + + if (!dma_tstamp || + ktime_after(ktime_add(dma_tstamp, timeout), port_tstamp)) + break; - while (skb_ci != skb_id) { - skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo); - hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp; - skb_tstamp_tx(skb, &hwts); - ptpsq->cq_stats->resync_cqe++; - napi_consume_skb(skb, budget); - skb_ci = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc); + metadata_map->undelivered_counter++; + WARN_ON_ONCE(!pos->inuse); + pos->inuse = false; + list_del(&pos->entry); } + spin_unlock(&cqe_list->tracker_list_lock); } +#define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask) + static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq, struct mlx5_cqe64 *cqe, int budget) { - u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter)); - u16 skb_ci = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc); + struct mlx5e_ptp_port_ts_cqe_list *pending_cqe_list = ptpsq->ts_cqe_pending_list; + u8 metadata_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter)); + bool is_err_cqe = !!MLX5E_RX_ERR_CQE(cqe); struct mlx5e_txqsq *sq = &ptpsq->txqsq; struct sk_buff *skb; ktime_t hwtstamp; - if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo); - ptpsq->cq_stats->err_cqe++; - goto out; + if (likely(pending_cqe_list->nodes[metadata_id].inuse)) { + mlx5e_ptp_port_ts_cqe_list_remove(pending_cqe_list, metadata_id); + } else { + /* Reclaim space in the unlikely event CQE was delivered after + * marking it late. + */ + ptpsq->metadata_map.undelivered_counter--; + ptpsq->cq_stats->late_cqe++; } - if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_ci, skb_id)) { - if (mlx5e_ptp_ts_cqe_ooo(ptpsq, skb_id)) { - /* already handled by a previous resync */ - ptpsq->cq_stats->ooo_cqe_drop++; - return; - } - mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_ci, skb_id, budget); + skb = mlx5e_ptp_metadata_map_remove(&ptpsq->metadata_map, metadata_id); + + if (unlikely(is_err_cqe)) { + ptpsq->cq_stats->err_cqe++; + goto out; } - skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo); hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe)); mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_PORT_HWTSTAMP, hwtstamp, ptpsq->cq_stats); ptpsq->cq_stats->cqe++; + mlx5e_ptpsq_mark_ts_cqes_undelivered(ptpsq, hwtstamp); out: napi_consume_skb(skb, budget); + mlx5e_ptp_metadata_fifo_push(&ptpsq->metadata_freelist, metadata_id); + if (unlikely(mlx5e_ptp_metadata_map_unhealthy(&ptpsq->metadata_map)) && + !test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) + queue_work(ptpsq->txqsq.priv->wq, &ptpsq->report_unhealthy_work); } static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget) @@ -291,36 +358,86 @@ static void mlx5e_ptp_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn) static int mlx5e_ptp_alloc_traffic_db(struct mlx5e_ptpsq *ptpsq, int numa) { - int wq_sz = mlx5_wq_cyc_get_size(&ptpsq->txqsq.wq); - struct mlx5_core_dev *mdev = ptpsq->txqsq.mdev; + struct mlx5e_ptp_metadata_fifo *metadata_freelist = &ptpsq->metadata_freelist; + struct mlx5e_ptp_metadata_map *metadata_map = &ptpsq->metadata_map; + struct mlx5e_ptp_port_ts_cqe_list *cqe_list; + int db_sz; + int md; - ptpsq->skb_fifo.fifo = kvzalloc_node(array_size(wq_sz, sizeof(*ptpsq->skb_fifo.fifo)), - GFP_KERNEL, numa); - if (!ptpsq->skb_fifo.fifo) + cqe_list = kvzalloc_node(sizeof(*ptpsq->ts_cqe_pending_list), GFP_KERNEL, numa); + if (!cqe_list) return -ENOMEM; + ptpsq->ts_cqe_pending_list = cqe_list; + + db_sz = min_t(u32, mlx5_wq_cyc_get_size(&ptpsq->txqsq.wq), + 1 << MLX5_CAP_GEN_2(ptpsq->txqsq.mdev, + ts_cqe_metadata_size2wqe_counter)); + ptpsq->ts_cqe_ctr_mask = db_sz - 1; + + cqe_list->nodes = kvzalloc_node(array_size(db_sz, sizeof(*cqe_list->nodes)), + GFP_KERNEL, numa); + if (!cqe_list->nodes) + goto free_cqe_list; + INIT_LIST_HEAD(&cqe_list->tracker_list_head); + spin_lock_init(&cqe_list->tracker_list_lock); + + metadata_freelist->data = + kvzalloc_node(array_size(db_sz, sizeof(*metadata_freelist->data)), + GFP_KERNEL, numa); + if (!metadata_freelist->data) + goto free_cqe_list_nodes; + metadata_freelist->mask = ptpsq->ts_cqe_ctr_mask; + + for (md = 0; md < db_sz; ++md) { + cqe_list->nodes[md].metadata_id = md; + metadata_freelist->data[md] = md; + } + metadata_freelist->pc = db_sz; + + metadata_map->data = + kvzalloc_node(array_size(db_sz, sizeof(*metadata_map->data)), + GFP_KERNEL, numa); + if (!metadata_map->data) + goto free_metadata_freelist; + metadata_map->capacity = db_sz; - ptpsq->skb_fifo.pc = &ptpsq->skb_fifo_pc; - ptpsq->skb_fifo.cc = &ptpsq->skb_fifo_cc; - ptpsq->skb_fifo.mask = wq_sz - 1; - if (MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter)) - ptpsq->ts_cqe_ctr_mask = - (1 << MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter)) - 1; return 0; + +free_metadata_freelist: + kvfree(metadata_freelist->data); +free_cqe_list_nodes: + kvfree(cqe_list->nodes); +free_cqe_list: + kvfree(cqe_list); + return -ENOMEM; } -static void mlx5e_ptp_drain_skb_fifo(struct mlx5e_skb_fifo *skb_fifo) +static void mlx5e_ptp_drain_metadata_map(struct mlx5e_ptp_metadata_map *map) { - while (*skb_fifo->pc != *skb_fifo->cc) { - struct sk_buff *skb = mlx5e_skb_fifo_pop(skb_fifo); + int idx; + + for (idx = 0; idx < map->capacity; ++idx) { + struct sk_buff *skb = map->data[idx]; dev_kfree_skb_any(skb); } } -static void mlx5e_ptp_free_traffic_db(struct mlx5e_skb_fifo *skb_fifo) +static void mlx5e_ptp_free_traffic_db(struct mlx5e_ptpsq *ptpsq) { - mlx5e_ptp_drain_skb_fifo(skb_fifo); - kvfree(skb_fifo->fifo); + mlx5e_ptp_drain_metadata_map(&ptpsq->metadata_map); + kvfree(ptpsq->metadata_map.data); + kvfree(ptpsq->metadata_freelist.data); + kvfree(ptpsq->ts_cqe_pending_list->nodes); + kvfree(ptpsq->ts_cqe_pending_list); +} + +static void mlx5e_ptpsq_unhealthy_work(struct work_struct *work) +{ + struct mlx5e_ptpsq *ptpsq = + container_of(work, struct mlx5e_ptpsq, report_unhealthy_work); + + mlx5e_reporter_tx_ptpsq_unhealthy(ptpsq); } static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn, @@ -348,11 +465,12 @@ static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn, if (err) goto err_free_txqsq; - err = mlx5e_ptp_alloc_traffic_db(ptpsq, - dev_to_node(mlx5_core_dma_dev(c->mdev))); + err = mlx5e_ptp_alloc_traffic_db(ptpsq, dev_to_node(mlx5_core_dma_dev(c->mdev))); if (err) goto err_free_txqsq; + INIT_WORK(&ptpsq->report_unhealthy_work, mlx5e_ptpsq_unhealthy_work); + return 0; err_free_txqsq: @@ -366,7 +484,9 @@ static void mlx5e_ptp_close_txqsq(struct mlx5e_ptpsq *ptpsq) struct mlx5e_txqsq *sq = &ptpsq->txqsq; struct mlx5_core_dev *mdev = sq->mdev; - mlx5e_ptp_free_traffic_db(&ptpsq->skb_fifo); + if (current_work() != &ptpsq->report_unhealthy_work) + cancel_work_sync(&ptpsq->report_unhealthy_work); + mlx5e_ptp_free_traffic_db(ptpsq); cancel_work_sync(&sq->recover_work); mlx5e_ptp_destroy_sq(mdev, sq->sqn); mlx5e_free_txqsq_descs(sq); @@ -534,7 +654,10 @@ static void mlx5e_ptp_build_params(struct mlx5e_ptp *c, /* SQ */ if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { - params->log_sq_size = orig->log_sq_size; + params->log_sq_size = + min(MLX5_CAP_GEN_2(c->mdev, ts_cqe_metadata_size2wqe_counter), + MLX5E_PTP_MAX_LOG_SQ_SIZE); + params->log_sq_size = min(params->log_sq_size, orig->log_sq_size); mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param); } /* RQ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h index cc7efde88ac3..7b700d0f956a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h @@ -7,18 +7,38 @@ #include "en.h" #include "en_stats.h" #include "en/txrx.h" +#include <linux/ktime.h> #include <linux/ptp_classify.h> +#include <linux/time64.h> +#include <linux/workqueue.h> #define MLX5E_PTP_CHANNEL_IX 0 +#define MLX5E_PTP_MAX_LOG_SQ_SIZE (8U) +#define MLX5E_PTP_TS_CQE_UNDELIVERED_TIMEOUT (1 * NSEC_PER_SEC) + +struct mlx5e_ptp_metadata_fifo { + u8 cc; + u8 pc; + u8 mask; + u8 *data; +}; + +struct mlx5e_ptp_metadata_map { + u16 undelivered_counter; + u16 capacity; + struct sk_buff **data; +}; struct mlx5e_ptpsq { struct mlx5e_txqsq txqsq; struct mlx5e_cq ts_cq; - u16 skb_fifo_cc; - u16 skb_fifo_pc; - struct mlx5e_skb_fifo skb_fifo; struct mlx5e_ptp_cq_stats *cq_stats; u16 ts_cqe_ctr_mask; + + struct work_struct report_unhealthy_work; + struct mlx5e_ptp_port_ts_cqe_list *ts_cqe_pending_list; + struct mlx5e_ptp_metadata_fifo metadata_freelist; + struct mlx5e_ptp_metadata_map metadata_map; }; enum { @@ -69,12 +89,35 @@ static inline bool mlx5e_use_ptpsq(struct sk_buff *skb) fk.ports.dst == htons(PTP_EV_PORT)); } -static inline bool mlx5e_ptpsq_fifo_has_room(struct mlx5e_txqsq *sq) +static inline void mlx5e_ptp_metadata_fifo_push(struct mlx5e_ptp_metadata_fifo *fifo, u8 metadata) { - if (!sq->ptpsq) - return true; + fifo->data[fifo->mask & fifo->pc++] = metadata; +} + +static inline u8 +mlx5e_ptp_metadata_fifo_pop(struct mlx5e_ptp_metadata_fifo *fifo) +{ + return fifo->data[fifo->mask & fifo->cc++]; +} - return mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo); +static inline void +mlx5e_ptp_metadata_map_put(struct mlx5e_ptp_metadata_map *map, + struct sk_buff *skb, u8 metadata) +{ + WARN_ON_ONCE(map->data[metadata]); + map->data[metadata] = skb; +} + +static inline bool mlx5e_ptpsq_metadata_freelist_empty(struct mlx5e_ptpsq *ptpsq) +{ + struct mlx5e_ptp_metadata_fifo *freelist; + + if (likely(!ptpsq)) + return false; + + freelist = &ptpsq->metadata_freelist; + + return freelist->pc == freelist->cc; } int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, @@ -89,6 +132,8 @@ void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs, const struct mlx5e_profile *profile); int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set); +void mlx5e_ptpsq_track_metadata(struct mlx5e_ptpsq *ptpsq, u8 metadata); + enum { MLX5E_SKB_CB_CQE_HWTSTAMP = BIT(0), MLX5E_SKB_CB_PORT_HWTSTAMP = BIT(1), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index 1874c2f0587f..244bc15a42ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -379,9 +379,9 @@ int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb_ if (!htb && htb_qopt->command != TC_HTB_CREATE) return -EINVAL; - if (htb_qopt->prio) { + if (htb_qopt->prio || htb_qopt->quantum) { NL_SET_ERR_MSG_MOD(htb_qopt->extack, - "prio parameter is not supported by device with HTB offload enabled."); + "prio and quantum parameters are not supported by device with HTB offload enabled."); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 560800246573..0fef853eab62 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -77,6 +77,10 @@ mlx5_esw_bridge_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_es return NULL; priv = netdev_priv(dev); + + if (!priv->mdev->priv.eswitch->br_offloads) + return NULL; + rpriv = priv->ppriv; *vport_num = rpriv->rep->vport; *esw_owner_vhca_id = MLX5_CAP_GEN(priv->mdev, vhca_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index b5c773ffc763..b12fe3c5a258 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -715,9 +715,20 @@ void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, uplink_priv = &uplink_rpriv->uplink_priv; ct_priv = uplink_priv->ct_priv; - if (!mlx5_ipsec_is_rx_flow(cqe) && - !mlx5e_tc_update_skb(cqe, skb, mapping_ctx, reg_c0, ct_priv, zone_restore_id, tunnel_id, - &tc_priv)) +#ifdef CONFIG_MLX5_EN_IPSEC + if (!(tunnel_id >> ESW_TUN_OPTS_BITS)) { + u32 mapped_id; + u32 metadata; + + mapped_id = tunnel_id & ESW_IPSEC_RX_MAPPED_ID_MASK; + if (mapped_id && + !mlx5_esw_ipsec_rx_make_metadata(priv, mapped_id, &metadata)) + mlx5e_ipsec_offload_handle_rx_skb(priv->netdev, skb, metadata); + } +#endif + + if (!mlx5e_tc_update_skb(cqe, skb, mapping_ctx, reg_c0, ct_priv, + zone_restore_id, tunnel_id, &tc_priv)) goto free_skb; forward: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index b35ff289af49..ff8242f67c54 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -164,6 +164,43 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx) return err; } +static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx) +{ + struct mlx5e_ptpsq *ptpsq = ctx; + struct mlx5e_channels *chs; + struct net_device *netdev; + struct mlx5e_priv *priv; + int carrier_ok; + int err; + + if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &ptpsq->txqsq.state)) + return 0; + + priv = ptpsq->txqsq.priv; + + mutex_lock(&priv->state_lock); + chs = &priv->channels; + netdev = priv->netdev; + + carrier_ok = netif_carrier_ok(netdev); + netif_carrier_off(netdev); + + mlx5e_deactivate_priv_channels(priv); + + mlx5e_ptp_close(chs->ptp); + err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp); + + mlx5e_activate_priv_channels(priv); + + /* return carrier back if needed */ + if (carrier_ok) + netif_carrier_on(netdev); + + mutex_unlock(&priv->state_lock); + + return err; +} + /* state lock cannot be grabbed within this function. * It can cause a dead lock or a read-after-free. */ @@ -516,6 +553,15 @@ static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlin return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq); } +static int mlx5e_tx_reporter_ptpsq_unhealthy_dump(struct mlx5e_priv *priv, + struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5e_ptpsq *ptpsq = ctx; + + return mlx5e_tx_reporter_dump_sq(priv, fmsg, &ptpsq->txqsq); +} + static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { @@ -621,6 +667,25 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) return to_ctx.status; } +void mlx5e_reporter_tx_ptpsq_unhealthy(struct mlx5e_ptpsq *ptpsq) +{ + struct mlx5e_ptp_metadata_map *map = &ptpsq->metadata_map; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_txqsq *txqsq = &ptpsq->txqsq; + struct mlx5e_cq *ts_cq = &ptpsq->ts_cq; + struct mlx5e_priv *priv = txqsq->priv; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = ptpsq; + err_ctx.recover = mlx5e_tx_reporter_ptpsq_unhealthy_recover; + err_ctx.dump = mlx5e_tx_reporter_ptpsq_unhealthy_dump; + snprintf(err_str, sizeof(err_str), + "Unhealthy TX port TS queue: %d, SQ: 0x%x, CQ: 0x%x, Undelivered CQEs: %u Map Capacity: %u", + txqsq->ch_ix, txqsq->sqn, ts_cq->mcq.cqn, map->undelivered_counter, map->capacity); + + mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); +} + static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { .name = "tx", .recover = mlx5e_tx_reporter_recover, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index e1095bc36543..56e6b8c7501f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -218,17 +218,32 @@ int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, res->rss_rqns, res->rss_nch); } -u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) +int mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx, + enum mlx5_traffic_types tt) { - struct mlx5e_rss *rss = res->rss[0]; + struct mlx5e_rss *rss; + + if (rss_idx >= MLX5E_MAX_NUM_RSS) + return -EINVAL; + + rss = res->rss[rss_idx]; + if (!rss) + return -ENOENT; return mlx5e_rss_get_hash_fields(rss, tt); } -int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, - u8 rx_hash_fields) +int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx, + enum mlx5_traffic_types tt, u8 rx_hash_fields) { - struct mlx5e_rss *rss = res->rss[0]; + struct mlx5e_rss *rss; + + if (rss_idx >= MLX5E_MAX_NUM_RSS) + return -EINVAL; + + rss = res->rss[rss_idx]; + if (!rss) + return -ENOENT; return mlx5e_rss_set_hash_fields(rss, tt, rx_hash_fields); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index 5d5f64fab60f..580fe8bc3cd2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -48,9 +48,10 @@ int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, const u32 *indir, const u8 *key, const u8 *hfunc); -u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); -int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, - u8 rx_hash_fields); +int mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx, + enum mlx5_traffic_types tt); +int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx, + enum mlx5_traffic_types tt, u8 rx_hash_fields); int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res, struct mlx5e_packet_merge_param *pkt_merge_param); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c index 2b80fe73549d..8c531f4ec912 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c @@ -221,16 +221,21 @@ mlx5_ct_fs_smfs_destroy(struct mlx5_ct_fs *fs) } static inline bool -mlx5_tc_ct_valid_used_dissector_keys(const u32 used_keys) +mlx5_tc_ct_valid_used_dissector_keys(const u64 used_keys) { -#define DISS_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name) - const u32 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | DISS_BIT(META); - const u32 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP); - const u32 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP); - const u32 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS); - const u32 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS); - const u32 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS); - const u32 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS); +#define DISS_BIT(name) BIT_ULL(FLOW_DISSECTOR_KEY_ ## name) + const u64 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | + DISS_BIT(META); + const u64 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | + DISS_BIT(PORTS) | DISS_BIT(TCP); + const u64 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | + DISS_BIT(PORTS) | DISS_BIT(TCP); + const u64 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | + DISS_BIT(PORTS); + const u64 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | + DISS_BIT(PORTS); + const u64 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS); + const u64 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS); return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp || used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre); @@ -247,7 +252,7 @@ mlx5_ct_fs_smfs_ct_validate_flow_rule(struct mlx5_ct_fs *fs, struct flow_rule *f struct flow_match_tcp tcp; if (!mlx5_tc_ct_valid_used_dissector_keys(flow_rule->match.dissector->used_keys)) { - ct_dbg("rule uses unexpected dissectors (0x%08x)", + ct_dbg("rule uses unexpected dissectors (0x%016llx)", flow_rule->match.dissector->used_keys); return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index f0c3464f037f..1730f6a716ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -1030,9 +1030,6 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, int out_index; int err = 0; - if (!mlx5e_is_eswitch_flow(flow)) - return 0; - parse_attr = attr->parse_attr; esw_attr = attr->esw_attr; *vf_tun = false; @@ -1464,10 +1461,12 @@ static void mlx5e_invalidate_encap(struct mlx5e_priv *priv, attr = mlx5e_tc_get_encap_attr(flow); esw_attr = attr->esw_attr; - if (flow_flag_test(flow, SLOW)) + if (flow_flag_test(flow, SLOW)) { mlx5e_tc_unoffload_from_slow_path(esw, flow); - else + } else { mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); + mlx5e_tc_unoffload_flow_post_acts(flow); + } mlx5e_tc_detach_mod_hdr(priv, flow, attr); attr->modify_hdr = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 201ac7dd338f..5620d9f97518 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2020 Mellanox Technologies */ -#include <net/page_pool.h> #include "en/txrx.h" #include "en/params.h" #include "en/trap.h" @@ -128,7 +127,7 @@ static void mlx5e_build_trap_params(struct mlx5_core_dev *mdev, static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) { - int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, 0)); + int cpu = mlx5_comp_vector_get_cpu(priv->mdev, 0); struct net_device *netdev = priv->netdev; struct mlx5e_trap *t; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 40589cebb773..12f56d0db0af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -35,6 +35,7 @@ #include "en/xdp.h" #include "en/params.h" #include <linux/bitfield.h> +#include <net/page_pool/helpers.h> int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 9e8e6184f9e4..ecfe93a479da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -84,6 +84,8 @@ enum mlx5e_xdp_xmit_mode { * MLX5E_XDP_XMIT_MODE_XSK: * none. */ +#define MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO 4 + union mlx5e_xdp_info { enum mlx5e_xdp_xmit_mode mode; union { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index d97e6df66f45..b8dd74453655 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -323,8 +323,11 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, net_prefetch(mxbuf->xdp.data); prog = rcu_dereference(rq->xdp_prog); - if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) + if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) { + if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) + wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); return NULL; /* page/packet was consumed by XDP */ + } /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse * will be handled by mlx5e_free_rx_wqe. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index bac4717548c6..caa34b9c161e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -138,7 +138,7 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev, } #endif -#ifdef CONFIG_MLX5_EN_MACSEC +#ifdef CONFIG_MLX5_MACSEC if (unlikely(mlx5e_macsec_skb_is_offload(skb))) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -173,7 +173,7 @@ static inline void mlx5e_accel_tx_eseg(struct mlx5e_priv *priv, mlx5e_ipsec_tx_build_eseg(priv, skb, eseg); #endif -#ifdef CONFIG_MLX5_EN_MACSEC +#ifdef CONFIG_MLX5_MACSEC if (unlikely(mlx5e_macsec_skb_is_offload(skb))) mlx5e_macsec_tx_build_eseg(priv->macsec, skb, eseg); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 891d39b4bfd4..7d4ceb9b9c16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -38,8 +38,10 @@ #include <net/netevent.h> #include "en.h" +#include "eswitch.h" #include "ipsec.h" #include "ipsec_rxtx.h" +#include "en_rep.h" #define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000) #define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1 @@ -354,6 +356,12 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, mlx5e_ipsec_init_limits(sa_entry, attrs); mlx5e_ipsec_init_macs(sa_entry, attrs); + + if (x->encap) { + attrs->encap = true; + attrs->sport = x->encap->encap_sport; + attrs->dport = x->encap->encap_dport; + } } static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, @@ -387,8 +395,25 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, return -EINVAL; } if (x->encap) { - NL_SET_ERR_MSG_MOD(extack, "Encapsulated xfrm state may not be offloaded"); - return -EINVAL; + if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESPINUDP)) { + NL_SET_ERR_MSG_MOD(extack, "Encapsulation is not supported"); + return -EINVAL; + } + + if (x->encap->encap_type != UDP_ENCAP_ESPINUDP) { + NL_SET_ERR_MSG_MOD(extack, "Encapsulation other than UDP is not supported"); + return -EINVAL; + } + + if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) { + NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in packet offload mode only"); + return -EINVAL; + } + + if (x->props.mode != XFRM_MODE_TRANSPORT) { + NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in transport mode only"); + return -EINVAL; + } } if (!x->aead) { NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without aead"); @@ -416,9 +441,9 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, return -EINVAL; } - if (x->sel.proto != IPPROTO_IP && - (x->sel.proto != IPPROTO_UDP || x->xso.dir != XFRM_DEV_OFFLOAD_OUT)) { - NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than UDP, and only Tx direction"); + if (x->sel.proto != IPPROTO_IP && x->sel.proto != IPPROTO_UDP && + x->sel.proto != IPPROTO_TCP) { + NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP"); return -EINVAL; } @@ -646,6 +671,11 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, if (err) goto err_xfrm; + if (!mlx5_eswitch_block_ipsec(priv->mdev)) { + err = -EBUSY; + goto err_xfrm; + } + /* check esn */ if (x->props.flags & XFRM_STATE_ESN) mlx5e_ipsec_update_esn_state(sa_entry); @@ -654,7 +684,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, err = mlx5_ipsec_create_work(sa_entry); if (err) - goto err_xfrm; + goto unblock_ipsec; err = mlx5e_ipsec_create_dwork(sa_entry); if (err) @@ -711,6 +741,8 @@ release_work: if (sa_entry->work) kfree(sa_entry->work->data); kfree(sa_entry->work); +unblock_ipsec: + mlx5_eswitch_unblock_ipsec(priv->mdev); err_xfrm: kfree(sa_entry); NL_SET_ERR_MSG_WEAK_MOD(extack, "Device failed to offload this state"); @@ -740,6 +772,7 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x) static void mlx5e_xfrm_free_state(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) goto sa_entry_free; @@ -756,6 +789,7 @@ static void mlx5e_xfrm_free_state(struct xfrm_state *x) if (sa_entry->work) kfree(sa_entry->work->data); kfree(sa_entry->work); + mlx5_eswitch_unblock_ipsec(ipsec->mdev); sa_entry_free: kfree(sa_entry); } @@ -835,6 +869,7 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv) goto clear_aso; } + ipsec->is_uplink_rep = mlx5e_is_uplink_rep(priv); ret = mlx5e_accel_ipsec_fs_init(ipsec); if (ret) goto err_fs_init; @@ -958,9 +993,10 @@ static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev, return -EINVAL; } - if (sel->proto != IPPROTO_IP && - (sel->proto != IPPROTO_UDP || x->xdo.dir != XFRM_DEV_OFFLOAD_OUT)) { - NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than UDP, and only Tx direction"); + if (x->selector.proto != IPPROTO_IP && + x->selector.proto != IPPROTO_UDP && + x->selector.proto != IPPROTO_TCP) { + NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP"); return -EINVAL; } @@ -1029,6 +1065,11 @@ static int mlx5e_xfrm_add_policy(struct xfrm_policy *x, pol_entry->x = x; pol_entry->ipsec = priv->ipsec; + if (!mlx5_eswitch_block_ipsec(priv->mdev)) { + err = -EBUSY; + goto ipsec_busy; + } + mlx5e_ipsec_build_accel_pol_attrs(pol_entry, &pol_entry->attrs); err = mlx5e_accel_ipsec_fs_add_pol(pol_entry); if (err) @@ -1038,6 +1079,8 @@ static int mlx5e_xfrm_add_policy(struct xfrm_policy *x, return 0; err_fs: + mlx5_eswitch_unblock_ipsec(priv->mdev); +ipsec_busy: kfree(pol_entry); NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy"); return err; @@ -1048,6 +1091,7 @@ static void mlx5e_xfrm_del_policy(struct xfrm_policy *x) struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x); mlx5e_accel_ipsec_fs_del_pol(pol_entry); + mlx5_eswitch_unblock_ipsec(pol_entry->ipsec->mdev); } static void mlx5e_xfrm_free_policy(struct xfrm_policy *x) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 4e9887171508..9e7c42c2f77b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -94,13 +94,20 @@ struct mlx5_accel_esp_xfrm_attrs { u8 dir : 2; u8 type : 2; u8 drop : 1; + u8 encap : 1; u8 family; struct mlx5_replay_esn replay_esn; u32 authsize; u32 reqid; struct mlx5_ipsec_lft lft; - u8 smac[ETH_ALEN]; - u8 dmac[ETH_ALEN]; + union { + u8 smac[ETH_ALEN]; + __be16 sport; + }; + union { + u8 dmac[ETH_ALEN]; + __be16 dport; + }; }; enum mlx5_ipsec_cap { @@ -110,6 +117,7 @@ enum mlx5_ipsec_cap { MLX5_IPSEC_CAP_ROCE = 1 << 3, MLX5_IPSEC_CAP_PRIO = 1 << 4, MLX5_IPSEC_CAP_TUNNEL = 1 << 5, + MLX5_IPSEC_CAP_ESPINUDP = 1 << 6, }; struct mlx5e_priv; @@ -135,7 +143,7 @@ struct mlx5e_ipsec_sw_stats { atomic64_t ipsec_tx_drop_trailer; }; -struct mlx5e_ipsec_rx; +struct mlx5e_ipsec_fc; struct mlx5e_ipsec_tx; struct mlx5e_ipsec_work { @@ -161,6 +169,58 @@ struct mlx5e_ipsec_aso { spinlock_t lock; }; +struct mlx5e_ipsec_rx_create_attr { + struct mlx5_flow_namespace *ns; + struct mlx5_ttc_table *ttc; + u32 family; + int prio; + int pol_level; + int sa_level; + int status_level; + enum mlx5_flow_namespace_type chains_ns; +}; + +struct mlx5e_ipsec_ft { + struct mutex mutex; /* Protect changes to this struct */ + struct mlx5_flow_table *pol; + struct mlx5_flow_table *sa; + struct mlx5_flow_table *status; + u32 refcnt; +}; + +struct mlx5e_ipsec_rule { + struct mlx5_flow_handle *rule; + struct mlx5_modify_hdr *modify_hdr; + struct mlx5_pkt_reformat *pkt_reformat; + struct mlx5_fc *fc; +}; + +struct mlx5e_ipsec_miss { + struct mlx5_flow_group *group; + struct mlx5_flow_handle *rule; +}; + +struct mlx5e_ipsec_rx { + struct mlx5e_ipsec_ft ft; + struct mlx5e_ipsec_miss pol; + struct mlx5e_ipsec_miss sa; + struct mlx5e_ipsec_rule status; + struct mlx5e_ipsec_miss status_drop; + struct mlx5_fc *status_drop_cnt; + struct mlx5e_ipsec_fc *fc; + struct mlx5_fs_chains *chains; + u8 allow_tunnel_mode : 1; + struct xarray ipsec_obj_id_map; +}; + +struct mlx5e_ipsec_tx_create_attr { + int prio; + int pol_level; + int sa_level; + int cnt_level; + enum mlx5_flow_namespace_type chains_ns; +}; + struct mlx5e_ipsec { struct mlx5_core_dev *mdev; struct xarray sadb; @@ -170,11 +230,14 @@ struct mlx5e_ipsec { struct mlx5e_flow_steering *fs; struct mlx5e_ipsec_rx *rx_ipv4; struct mlx5e_ipsec_rx *rx_ipv6; + struct mlx5e_ipsec_rx *rx_esw; struct mlx5e_ipsec_tx *tx; + struct mlx5e_ipsec_tx *tx_esw; struct mlx5e_ipsec_aso *aso; struct notifier_block nb; struct notifier_block netevent_nb; struct mlx5_ipsec_fs *roce; + u8 is_uplink_rep: 1; }; struct mlx5e_ipsec_esn_state { @@ -183,13 +246,6 @@ struct mlx5e_ipsec_esn_state { u8 overlap: 1; }; -struct mlx5e_ipsec_rule { - struct mlx5_flow_handle *rule; - struct mlx5_modify_hdr *modify_hdr; - struct mlx5_pkt_reformat *pkt_reformat; - struct mlx5_fc *fc; -}; - struct mlx5e_ipsec_limits { u64 round; u8 soft_limit_hit : 1; @@ -209,6 +265,7 @@ struct mlx5e_ipsec_sa_entry { struct mlx5e_ipsec_work *work; struct mlx5e_ipsec_dwork *dwork; struct mlx5e_ipsec_limits limits; + u32 rx_mapped_id; }; struct mlx5_accel_pol_xfrm_attrs { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index dbe87bf89c0d..7dba4221993f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -9,6 +9,8 @@ #include "fs_core.h" #include "lib/ipsec_fs_roce.h" #include "lib/fs_chains.h" +#include "esw/ipsec_fs.h" +#include "en_rep.h" #define NUM_IPSEC_FTE BIT(15) #define MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE 16 @@ -19,32 +21,10 @@ struct mlx5e_ipsec_fc { struct mlx5_fc *drop; }; -struct mlx5e_ipsec_ft { - struct mutex mutex; /* Protect changes to this struct */ - struct mlx5_flow_table *pol; - struct mlx5_flow_table *sa; - struct mlx5_flow_table *status; - u32 refcnt; -}; - -struct mlx5e_ipsec_miss { - struct mlx5_flow_group *group; - struct mlx5_flow_handle *rule; -}; - -struct mlx5e_ipsec_rx { - struct mlx5e_ipsec_ft ft; - struct mlx5e_ipsec_miss pol; - struct mlx5e_ipsec_miss sa; - struct mlx5e_ipsec_rule status; - struct mlx5e_ipsec_fc *fc; - struct mlx5_fs_chains *chains; - u8 allow_tunnel_mode : 1; -}; - struct mlx5e_ipsec_tx { struct mlx5e_ipsec_ft ft; struct mlx5e_ipsec_miss pol; + struct mlx5e_ipsec_miss sa; struct mlx5e_ipsec_rule status; struct mlx5_flow_namespace *ns; struct mlx5e_ipsec_fc *fc; @@ -60,14 +40,25 @@ static enum mlx5_traffic_types family2tt(u32 family) return MLX5_TT_IPV6_IPSEC_ESP; } -static struct mlx5e_ipsec_rx *ipsec_rx(struct mlx5e_ipsec *ipsec, u32 family) +static struct mlx5e_ipsec_rx *ipsec_rx(struct mlx5e_ipsec *ipsec, u32 family, int type) { + if (ipsec->is_uplink_rep && type == XFRM_DEV_OFFLOAD_PACKET) + return ipsec->rx_esw; + if (family == AF_INET) return ipsec->rx_ipv4; return ipsec->rx_ipv6; } +static struct mlx5e_ipsec_tx *ipsec_tx(struct mlx5e_ipsec *ipsec, int type) +{ + if (ipsec->is_uplink_rep && type == XFRM_DEV_OFFLOAD_PACKET) + return ipsec->tx_esw; + + return ipsec->tx; +} + static struct mlx5_fs_chains * ipsec_chains_create(struct mlx5_core_dev *mdev, struct mlx5_flow_table *miss_ft, enum mlx5_flow_namespace_type ns, int base_prio, @@ -238,13 +229,19 @@ out: return err; } -static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx, u32 family) +static void ipsec_rx_ft_disconnect(struct mlx5e_ipsec *ipsec, u32 family) { struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(ipsec->fs, false); - /* disconnect */ mlx5_ttc_fwd_default_dest(ttc, family2tt(family)); +} + +static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, u32 family) +{ + /* disconnect */ + if (rx != ipsec->rx_esw) + ipsec_rx_ft_disconnect(ipsec, family); if (rx->chains) { ipsec_chains_destroy(rx->chains); @@ -259,51 +256,105 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, mlx5_destroy_flow_table(rx->ft.sa); if (rx->allow_tunnel_mode) mlx5_eswitch_unblock_encap(mdev); - mlx5_del_flow_rules(rx->status.rule); - mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr); + if (rx == ipsec->rx_esw) { + mlx5_esw_ipsec_rx_status_destroy(ipsec, rx); + } else { + mlx5_del_flow_rules(rx->status.rule); + mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr); + } mlx5_destroy_flow_table(rx->ft.status); mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family); } +static void ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + u32 family, + struct mlx5e_ipsec_rx_create_attr *attr) +{ + if (rx == ipsec->rx_esw) { + /* For packet offload in switchdev mode, RX & TX use FDB namespace */ + attr->ns = ipsec->tx_esw->ns; + mlx5_esw_ipsec_rx_create_attr_set(ipsec, attr); + return; + } + + attr->ns = mlx5e_fs_get_ns(ipsec->fs, false); + attr->ttc = mlx5e_fs_get_ttc(ipsec->fs, false); + attr->family = family; + attr->prio = MLX5E_NIC_PRIO; + attr->pol_level = MLX5E_ACCEL_FS_POL_FT_LEVEL; + attr->sa_level = MLX5E_ACCEL_FS_ESP_FT_LEVEL; + attr->status_level = MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL; + attr->chains_ns = MLX5_FLOW_NAMESPACE_KERNEL; +} + +static int ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5e_ipsec_rx_create_attr *attr, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_table *ft; + int err; + + if (rx == ipsec->rx_esw) + return mlx5_esw_ipsec_rx_status_pass_dest_get(ipsec, dest); + + *dest = mlx5_ttc_get_default_dest(attr->ttc, family2tt(attr->family)); + err = mlx5_ipsec_fs_roce_rx_create(ipsec->mdev, ipsec->roce, attr->ns, dest, + attr->family, MLX5E_ACCEL_FS_ESP_FT_ROCE_LEVEL, + attr->prio); + if (err) + return err; + + ft = mlx5_ipsec_fs_roce_ft_get(ipsec->roce, attr->family); + if (ft) { + dest->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest->ft = ft; + } + + return 0; +} + +static void ipsec_rx_ft_connect(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5e_ipsec_rx_create_attr *attr) +{ + struct mlx5_flow_destination dest = {}; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = rx->ft.pol; + mlx5_ttc_fwd_dest(attr->ttc, family2tt(attr->family), &dest); +} + static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx, u32 family) { - struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(ipsec->fs, false); - struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(ipsec->fs, false); - struct mlx5_flow_destination default_dest; + struct mlx5e_ipsec_rx_create_attr attr; struct mlx5_flow_destination dest[2]; struct mlx5_flow_table *ft; u32 flags = 0; int err; - default_dest = mlx5_ttc_get_default_dest(ttc, family2tt(family)); - err = mlx5_ipsec_fs_roce_rx_create(mdev, ipsec->roce, ns, &default_dest, - family, MLX5E_ACCEL_FS_ESP_FT_ROCE_LEVEL, - MLX5E_NIC_PRIO); + ipsec_rx_create_attr_set(ipsec, rx, family, &attr); + + err = ipsec_rx_status_pass_dest_get(ipsec, rx, &attr, &dest[0]); if (err) return err; - ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL, - MLX5E_NIC_PRIO, 1, 0); + ft = ipsec_ft_create(attr.ns, attr.status_level, attr.prio, 1, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_fs_ft_status; } - rx->ft.status = ft; - ft = mlx5_ipsec_fs_roce_ft_get(ipsec->roce, family); - if (ft) { - dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[0].ft = ft; - } else { - dest[0] = default_dest; - } - dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[1].counter_id = mlx5_fc_id(rx->fc->cnt); - err = ipsec_status_rule(mdev, rx, dest); + if (rx == ipsec->rx_esw) + err = mlx5_esw_ipsec_rx_status_create(ipsec, rx, dest); + else + err = ipsec_status_rule(mdev, rx, dest); if (err) goto err_add; @@ -312,8 +363,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, rx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev); if (rx->allow_tunnel_mode) flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_LEVEL, MLX5E_NIC_PRIO, 2, - flags); + ft = ipsec_ft_create(attr.ns, attr.sa_level, attr.prio, 2, flags); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_fs_ft; @@ -326,9 +376,9 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO) { rx->chains = ipsec_chains_create(mdev, rx->ft.sa, - MLX5_FLOW_NAMESPACE_KERNEL, - MLX5E_NIC_PRIO, - MLX5E_ACCEL_FS_POL_FT_LEVEL, + attr.chains_ns, + attr.prio, + attr.pol_level, &rx->ft.pol); if (IS_ERR(rx->chains)) { err = PTR_ERR(rx->chains); @@ -338,8 +388,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, goto connect; } - ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_POL_FT_LEVEL, MLX5E_NIC_PRIO, - 2, 0); + ft = ipsec_ft_create(attr.ns, attr.pol_level, attr.prio, 2, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_pol_ft; @@ -354,10 +403,8 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, connect: /* connect */ - memset(dest, 0x00, sizeof(*dest)); - dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[0].ft = rx->ft.pol; - mlx5_ttc_fwd_dest(ttc, family2tt(family), &dest[0]); + if (rx != ipsec->rx_esw) + ipsec_rx_ft_connect(ipsec, rx, &attr); return 0; err_pol_miss: @@ -387,10 +434,16 @@ static int rx_get(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, if (rx->ft.refcnt) goto skip; - err = rx_create(mdev, ipsec, rx, family); + err = mlx5_eswitch_block_mode(mdev); if (err) return err; + err = rx_create(mdev, ipsec, rx, family); + if (err) { + mlx5_eswitch_unblock_mode(mdev); + return err; + } + skip: rx->ft.refcnt++; return 0; @@ -403,12 +456,14 @@ static void rx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx, return; rx_destroy(ipsec->mdev, ipsec, rx, family); + mlx5_eswitch_unblock_mode(ipsec->mdev); } static struct mlx5e_ipsec_rx *rx_ft_get(struct mlx5_core_dev *mdev, - struct mlx5e_ipsec *ipsec, u32 family) + struct mlx5e_ipsec *ipsec, u32 family, + int type) { - struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family); + struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family, type); int err; mutex_lock(&rx->ft.mutex); @@ -422,9 +477,9 @@ static struct mlx5e_ipsec_rx *rx_ft_get(struct mlx5_core_dev *mdev, static struct mlx5_flow_table *rx_ft_get_policy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, - u32 family, u32 prio) + u32 family, u32 prio, int type) { - struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family); + struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family, type); struct mlx5_flow_table *ft; int err; @@ -449,18 +504,18 @@ err_get: return ERR_PTR(err); } -static void rx_ft_put(struct mlx5e_ipsec *ipsec, u32 family) +static void rx_ft_put(struct mlx5e_ipsec *ipsec, u32 family, int type) { - struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family); + struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family, type); mutex_lock(&rx->ft.mutex); rx_put(ipsec, rx, family); mutex_unlock(&rx->ft.mutex); } -static void rx_ft_put_policy(struct mlx5e_ipsec *ipsec, u32 family, u32 prio) +static void rx_ft_put_policy(struct mlx5e_ipsec *ipsec, u32 family, u32 prio, int type) { - struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family); + struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family, type); mutex_lock(&rx->ft.mutex); if (rx->chains) @@ -504,7 +559,7 @@ err_rule: } /* IPsec TX flow steering */ -static void tx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx, +static void tx_destroy(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx, struct mlx5_ipsec_fs *roce) { mlx5_ipsec_fs_roce_tx_destroy(roce); @@ -516,22 +571,45 @@ static void tx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx, mlx5_destroy_flow_table(tx->ft.pol); } + if (tx == ipsec->tx_esw) { + mlx5_del_flow_rules(tx->sa.rule); + mlx5_destroy_flow_group(tx->sa.group); + } mlx5_destroy_flow_table(tx->ft.sa); if (tx->allow_tunnel_mode) - mlx5_eswitch_unblock_encap(mdev); + mlx5_eswitch_unblock_encap(ipsec->mdev); mlx5_del_flow_rules(tx->status.rule); mlx5_destroy_flow_table(tx->ft.status); } -static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx, +static void ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_tx *tx, + struct mlx5e_ipsec_tx_create_attr *attr) +{ + if (tx == ipsec->tx_esw) { + mlx5_esw_ipsec_tx_create_attr_set(ipsec, attr); + return; + } + + attr->prio = 0; + attr->pol_level = 0; + attr->sa_level = 1; + attr->cnt_level = 2; + attr->chains_ns = MLX5_FLOW_NAMESPACE_EGRESS_IPSEC; +} + +static int tx_create(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx, struct mlx5_ipsec_fs *roce) { + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5e_ipsec_tx_create_attr attr; struct mlx5_flow_destination dest = {}; struct mlx5_flow_table *ft; u32 flags = 0; int err; - ft = ipsec_ft_create(tx->ns, 2, 0, 1, 0); + ipsec_tx_create_attr_set(ipsec, tx, &attr); + ft = ipsec_ft_create(tx->ns, attr.cnt_level, attr.prio, 1, 0); if (IS_ERR(ft)) return PTR_ERR(ft); tx->ft.status = ft; @@ -544,16 +622,25 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx, tx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev); if (tx->allow_tunnel_mode) flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - ft = ipsec_ft_create(tx->ns, 1, 0, 4, flags); + ft = ipsec_ft_create(tx->ns, attr.sa_level, attr.prio, 4, flags); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_sa_ft; } tx->ft.sa = ft; + if (tx == ipsec->tx_esw) { + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = MLX5_VPORT_UPLINK; + err = ipsec_miss_create(mdev, tx->ft.sa, &tx->sa, &dest); + if (err) + goto err_sa_miss; + memset(&dest, 0, sizeof(dest)); + } + if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO) { tx->chains = ipsec_chains_create( - mdev, tx->ft.sa, MLX5_FLOW_NAMESPACE_EGRESS_IPSEC, 0, 0, + mdev, tx->ft.sa, attr.chains_ns, attr.prio, attr.pol_level, &tx->ft.pol); if (IS_ERR(tx->chains)) { err = PTR_ERR(tx->chains); @@ -563,7 +650,7 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx, goto connect_roce; } - ft = ipsec_ft_create(tx->ns, 0, 0, 2, 0); + ft = ipsec_ft_create(tx->ns, attr.pol_level, attr.prio, 2, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_pol_ft; @@ -592,6 +679,11 @@ err_roce: mlx5_destroy_flow_table(tx->ft.pol); } err_pol_ft: + if (tx == ipsec->tx_esw) { + mlx5_del_flow_rules(tx->sa.rule); + mlx5_destroy_flow_group(tx->sa.group); + } +err_sa_miss: mlx5_destroy_flow_table(tx->ft.sa); err_sa_ft: if (tx->allow_tunnel_mode) @@ -602,6 +694,25 @@ err_status_rule: return err; } +static void ipsec_esw_tx_ft_policy_set(struct mlx5_core_dev *mdev, + struct mlx5_flow_table *ft) +{ +#ifdef CONFIG_MLX5_ESWITCH + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_priv *priv; + + esw->offloads.ft_ipsec_tx_pol = ft; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + priv = netdev_priv(uplink_rpriv->netdev); + if (!priv->channels.num) + return; + + mlx5e_rep_deactivate_channels(priv); + mlx5e_rep_activate_channels(priv); +#endif +} + static int tx_get(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx) { @@ -610,10 +721,19 @@ static int tx_get(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, if (tx->ft.refcnt) goto skip; - err = tx_create(mdev, tx, ipsec->roce); + err = mlx5_eswitch_block_mode(mdev); if (err) return err; + err = tx_create(ipsec, tx, ipsec->roce); + if (err) { + mlx5_eswitch_unblock_mode(mdev); + return err; + } + + if (tx == ipsec->tx_esw) + ipsec_esw_tx_ft_policy_set(mdev, tx->ft.pol); + skip: tx->ft.refcnt++; return 0; @@ -624,14 +744,20 @@ static void tx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx) if (--tx->ft.refcnt) return; - tx_destroy(ipsec->mdev, tx, ipsec->roce); + if (tx == ipsec->tx_esw) { + mlx5_esw_ipsec_restore_dest_uplink(ipsec->mdev); + ipsec_esw_tx_ft_policy_set(ipsec->mdev, NULL); + } + + tx_destroy(ipsec, tx, ipsec->roce); + mlx5_eswitch_unblock_mode(ipsec->mdev); } static struct mlx5_flow_table *tx_ft_get_policy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, - u32 prio) + u32 prio, int type) { - struct mlx5e_ipsec_tx *tx = ipsec->tx; + struct mlx5e_ipsec_tx *tx = ipsec_tx(ipsec, type); struct mlx5_flow_table *ft; int err; @@ -657,9 +783,9 @@ err_get: } static struct mlx5e_ipsec_tx *tx_ft_get(struct mlx5_core_dev *mdev, - struct mlx5e_ipsec *ipsec) + struct mlx5e_ipsec *ipsec, int type) { - struct mlx5e_ipsec_tx *tx = ipsec->tx; + struct mlx5e_ipsec_tx *tx = ipsec_tx(ipsec, type); int err; mutex_lock(&tx->ft.mutex); @@ -671,18 +797,18 @@ static struct mlx5e_ipsec_tx *tx_ft_get(struct mlx5_core_dev *mdev, return tx; } -static void tx_ft_put(struct mlx5e_ipsec *ipsec) +static void tx_ft_put(struct mlx5e_ipsec *ipsec, int type) { - struct mlx5e_ipsec_tx *tx = ipsec->tx; + struct mlx5e_ipsec_tx *tx = ipsec_tx(ipsec, type); mutex_lock(&tx->ft.mutex); tx_put(ipsec, tx); mutex_unlock(&tx->ft.mutex); } -static void tx_ft_put_policy(struct mlx5e_ipsec *ipsec, u32 prio) +static void tx_ft_put_policy(struct mlx5e_ipsec *ipsec, u32 prio, int type) { - struct mlx5e_ipsec_tx *tx = ipsec->tx; + struct mlx5e_ipsec_tx *tx = ipsec_tx(ipsec, type); mutex_lock(&tx->ft.mutex); if (tx->chains) @@ -782,43 +908,75 @@ static void setup_fte_reg_a(struct mlx5_flow_spec *spec) misc_parameters_2.metadata_reg_a, MLX5_ETH_WQE_FT_META_IPSEC); } -static void setup_fte_reg_c0(struct mlx5_flow_spec *spec, u32 reqid) +static void setup_fte_reg_c4(struct mlx5_flow_spec *spec, u32 reqid) { /* Pass policy check before choosing this SA */ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; - MLX5_SET(fte_match_param, spec->match_criteria, - misc_parameters_2.metadata_reg_c_0, reqid); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_4); MLX5_SET(fte_match_param, spec->match_value, - misc_parameters_2.metadata_reg_c_0, reqid); + misc_parameters_2.metadata_reg_c_4, reqid); } static void setup_fte_upper_proto_match(struct mlx5_flow_spec *spec, struct upspec *upspec) { - if (upspec->proto != IPPROTO_UDP) + switch (upspec->proto) { + case IPPROTO_UDP: + if (upspec->dport) { + MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, + udp_dport, upspec->dport_mask); + MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, + udp_dport, upspec->dport); + } + if (upspec->sport) { + MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, + udp_sport, upspec->sport_mask); + MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, + udp_sport, upspec->sport); + } + break; + case IPPROTO_TCP: + if (upspec->dport) { + MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, + tcp_dport, upspec->dport_mask); + MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, + tcp_dport, upspec->dport); + } + if (upspec->sport) { + MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, + tcp_sport, upspec->sport_mask); + MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, + tcp_sport, upspec->sport); + } + break; + default: return; + } spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, spec->match_criteria, ip_protocol); MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, ip_protocol, upspec->proto); - if (upspec->dport) { - MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, udp_dport, - upspec->dport_mask); - MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, udp_dport, upspec->dport); - } +} - if (upspec->sport) { - MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, udp_dport, - upspec->sport_mask); - MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, udp_dport, upspec->sport); - } +static enum mlx5_flow_namespace_type ipsec_fs_get_ns(struct mlx5e_ipsec *ipsec, + int type, u8 dir) +{ + if (ipsec->is_uplink_rep && type == XFRM_DEV_OFFLOAD_PACKET) + return MLX5_FLOW_NAMESPACE_FDB; + + if (dir == XFRM_DEV_OFFLOAD_IN) + return MLX5_FLOW_NAMESPACE_KERNEL; + + return MLX5_FLOW_NAMESPACE_EGRESS; } -static int setup_modify_header(struct mlx5_core_dev *mdev, u32 val, u8 dir, +static int setup_modify_header(struct mlx5e_ipsec *ipsec, int type, u32 val, u8 dir, struct mlx5_flow_act *flow_act) { + enum mlx5_flow_namespace_type ns_type = ipsec_fs_get_ns(ipsec, type, dir); u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; - enum mlx5_flow_namespace_type ns_type; + struct mlx5_core_dev *mdev = ipsec->mdev; struct mlx5_modify_hdr *modify_hdr; MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); @@ -826,12 +984,10 @@ static int setup_modify_header(struct mlx5_core_dev *mdev, u32 val, u8 dir, case XFRM_DEV_OFFLOAD_IN: MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); - ns_type = MLX5_FLOW_NAMESPACE_KERNEL; break; case XFRM_DEV_OFFLOAD_OUT: MLX5_SET(set_action_in, action, field, - MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); - ns_type = MLX5_FLOW_NAMESPACE_EGRESS; + MLX5_ACTION_IN_FIELD_METADATA_REG_C_4); break; default: return -EINVAL; @@ -951,37 +1107,70 @@ free_reformatbf: return -EINVAL; } +static int get_reformat_type(struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: + if (attrs->encap) + return MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT_OVER_UDP; + return MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT; + case XFRM_DEV_OFFLOAD_OUT: + if (attrs->family == AF_INET) { + if (attrs->encap) + return MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV4; + return MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4; + } + + if (attrs->encap) + return MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV6; + return MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6; + default: + WARN_ON(true); + } + + return -EINVAL; +} + static int setup_pkt_transport_reformat(struct mlx5_accel_esp_xfrm_attrs *attrs, struct mlx5_pkt_reformat_params *reformat_params) { - u8 *reformatbf; + struct udphdr *udphdr; + char *reformatbf; + size_t bfflen; __be32 spi; + void *hdr; + + reformat_params->type = get_reformat_type(attrs); + if (reformat_params->type < 0) + return reformat_params->type; switch (attrs->dir) { case XFRM_DEV_OFFLOAD_IN: - reformat_params->type = MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT; break; case XFRM_DEV_OFFLOAD_OUT: - if (attrs->family == AF_INET) - reformat_params->type = - MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4; - else - reformat_params->type = - MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6; - - reformatbf = kzalloc(MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE, - GFP_KERNEL); + bfflen = MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE; + if (attrs->encap) + bfflen += sizeof(*udphdr); + + reformatbf = kzalloc(bfflen, GFP_KERNEL); if (!reformatbf) return -ENOMEM; + hdr = reformatbf; + if (attrs->encap) { + udphdr = (struct udphdr *)reformatbf; + udphdr->source = attrs->sport; + udphdr->dest = attrs->dport; + hdr += sizeof(*udphdr); + } + /* convert to network format */ spi = htonl(attrs->spi); - memcpy(reformatbf, &spi, sizeof(spi)); + memcpy(hdr, &spi, sizeof(spi)); reformat_params->param_0 = attrs->authsize; - reformat_params->size = - MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE; + reformat_params->size = bfflen; reformat_params->data = reformatbf; break; default: @@ -991,26 +1180,17 @@ setup_pkt_transport_reformat(struct mlx5_accel_esp_xfrm_attrs *attrs, return 0; } -static int setup_pkt_reformat(struct mlx5_core_dev *mdev, +static int setup_pkt_reformat(struct mlx5e_ipsec *ipsec, struct mlx5_accel_esp_xfrm_attrs *attrs, struct mlx5_flow_act *flow_act) { + enum mlx5_flow_namespace_type ns_type = ipsec_fs_get_ns(ipsec, attrs->type, + attrs->dir); struct mlx5_pkt_reformat_params reformat_params = {}; + struct mlx5_core_dev *mdev = ipsec->mdev; struct mlx5_pkt_reformat *pkt_reformat; - enum mlx5_flow_namespace_type ns_type; int ret; - switch (attrs->dir) { - case XFRM_DEV_OFFLOAD_IN: - ns_type = MLX5_FLOW_NAMESPACE_KERNEL; - break; - case XFRM_DEV_OFFLOAD_OUT: - ns_type = MLX5_FLOW_NAMESPACE_EGRESS; - break; - default: - return -EINVAL; - } - switch (attrs->mode) { case XFRM_MODE_TRANSPORT: ret = setup_pkt_transport_reformat(attrs, &reformat_params); @@ -1047,9 +1227,9 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) struct mlx5_flow_spec *spec; struct mlx5e_ipsec_rx *rx; struct mlx5_fc *counter; - int err; + int err = 0; - rx = rx_ft_get(mdev, ipsec, attrs->family); + rx = rx_ft_get(mdev, ipsec, attrs->family, attrs->type); if (IS_ERR(rx)) return PTR_ERR(rx); @@ -1067,15 +1247,21 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) setup_fte_spi(spec, attrs->spi); setup_fte_esp(spec); setup_fte_no_frags(spec); + setup_fte_upper_proto_match(spec, &attrs->upspec); + + if (rx != ipsec->rx_esw) + err = setup_modify_header(ipsec, attrs->type, + sa_entry->ipsec_obj_id | BIT(31), + XFRM_DEV_OFFLOAD_IN, &flow_act); + else + err = mlx5_esw_ipsec_rx_setup_modify_header(sa_entry, &flow_act); - err = setup_modify_header(mdev, sa_entry->ipsec_obj_id | BIT(31), - XFRM_DEV_OFFLOAD_IN, &flow_act); if (err) goto err_mod_header; switch (attrs->type) { case XFRM_DEV_OFFLOAD_PACKET: - err = setup_pkt_reformat(mdev, attrs, &flow_act); + err = setup_pkt_reformat(ipsec, attrs, &flow_act); if (err) goto err_pkt_reformat; break; @@ -1125,7 +1311,7 @@ err_pkt_reformat: err_mod_header: kvfree(spec); err_alloc: - rx_ft_put(ipsec, attrs->family); + rx_ft_put(ipsec, attrs->family, attrs->type); return err; } @@ -1142,7 +1328,7 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) struct mlx5_fc *counter; int err; - tx = tx_ft_get(mdev, ipsec); + tx = tx_ft_get(mdev, ipsec, attrs->type); if (IS_ERR(tx)) return PTR_ERR(tx); @@ -1168,8 +1354,8 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) break; case XFRM_DEV_OFFLOAD_PACKET: if (attrs->reqid) - setup_fte_reg_c0(spec, attrs->reqid); - err = setup_pkt_reformat(mdev, attrs, &flow_act); + setup_fte_reg_c4(spec, attrs->reqid); + err = setup_pkt_reformat(ipsec, attrs, &flow_act); if (err) goto err_pkt_reformat; break; @@ -1218,7 +1404,7 @@ err_add_cnt: err_pkt_reformat: kvfree(spec); err_alloc: - tx_ft_put(ipsec); + tx_ft_put(ipsec, attrs->type); return err; } @@ -1226,15 +1412,16 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) { struct mlx5_accel_pol_xfrm_attrs *attrs = &pol_entry->attrs; struct mlx5_core_dev *mdev = mlx5e_ipsec_pol2dev(pol_entry); - struct mlx5e_ipsec_tx *tx = pol_entry->ipsec->tx; + struct mlx5e_ipsec *ipsec = pol_entry->ipsec; struct mlx5_flow_destination dest[2] = {}; struct mlx5_flow_act flow_act = {}; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; struct mlx5_flow_table *ft; + struct mlx5e_ipsec_tx *tx; int err, dstn = 0; - ft = tx_ft_get_policy(mdev, pol_entry->ipsec, attrs->prio); + ft = tx_ft_get_policy(mdev, ipsec, attrs->prio, attrs->type); if (IS_ERR(ft)) return PTR_ERR(ft); @@ -1244,6 +1431,7 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) goto err_alloc; } + tx = ipsec_tx(ipsec, attrs->type); if (attrs->family == AF_INET) setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); else @@ -1258,7 +1446,7 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) if (!attrs->reqid) break; - err = setup_modify_header(mdev, attrs->reqid, + err = setup_modify_header(ipsec, attrs->type, attrs->reqid, XFRM_DEV_OFFLOAD_OUT, &flow_act); if (err) goto err_mod_header; @@ -1277,6 +1465,8 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) } flow_act.flags |= FLOW_ACT_NO_APPEND; + if (tx == ipsec->tx_esw && tx->chains) + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; dest[dstn].ft = tx->ft.sa; dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dstn++; @@ -1298,7 +1488,7 @@ err_action: err_mod_header: kvfree(spec); err_alloc: - tx_ft_put_policy(pol_entry->ipsec, attrs->prio); + tx_ft_put_policy(ipsec, attrs->prio, attrs->type); return err; } @@ -1306,6 +1496,7 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) { struct mlx5_accel_pol_xfrm_attrs *attrs = &pol_entry->attrs; struct mlx5_core_dev *mdev = mlx5e_ipsec_pol2dev(pol_entry); + struct mlx5e_ipsec *ipsec = pol_entry->ipsec; struct mlx5_flow_destination dest[2]; struct mlx5_flow_act flow_act = {}; struct mlx5_flow_handle *rule; @@ -1314,11 +1505,12 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) struct mlx5e_ipsec_rx *rx; int err, dstn = 0; - ft = rx_ft_get_policy(mdev, pol_entry->ipsec, attrs->family, attrs->prio); + ft = rx_ft_get_policy(mdev, pol_entry->ipsec, attrs->family, attrs->prio, + attrs->type); if (IS_ERR(ft)) return PTR_ERR(ft); - rx = ipsec_rx(pol_entry->ipsec, attrs->family); + rx = ipsec_rx(pol_entry->ipsec, attrs->family, attrs->type); spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { @@ -1332,6 +1524,7 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); setup_fte_no_frags(spec); + setup_fte_upper_proto_match(spec, &attrs->upspec); switch (attrs->action) { case XFRM_POLICY_ALLOW: @@ -1350,6 +1543,8 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) } flow_act.flags |= FLOW_ACT_NO_APPEND; + if (rx == ipsec->rx_esw && rx->chains) + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[dstn].ft = rx->ft.sa; dstn++; @@ -1367,88 +1562,110 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) err_action: kvfree(spec); err_alloc: - rx_ft_put_policy(pol_entry->ipsec, attrs->family, attrs->prio); + rx_ft_put_policy(pol_entry->ipsec, attrs->family, attrs->prio, attrs->type); return err; } +static void ipsec_fs_destroy_single_counter(struct mlx5_core_dev *mdev, + struct mlx5e_ipsec_fc *fc) +{ + mlx5_fc_destroy(mdev, fc->drop); + mlx5_fc_destroy(mdev, fc->cnt); + kfree(fc); +} + static void ipsec_fs_destroy_counters(struct mlx5e_ipsec *ipsec) { - struct mlx5e_ipsec_rx *rx_ipv4 = ipsec->rx_ipv4; struct mlx5_core_dev *mdev = ipsec->mdev; - struct mlx5e_ipsec_tx *tx = ipsec->tx; - mlx5_fc_destroy(mdev, tx->fc->drop); - mlx5_fc_destroy(mdev, tx->fc->cnt); - kfree(tx->fc); - mlx5_fc_destroy(mdev, rx_ipv4->fc->drop); - mlx5_fc_destroy(mdev, rx_ipv4->fc->cnt); - kfree(rx_ipv4->fc); + ipsec_fs_destroy_single_counter(mdev, ipsec->tx->fc); + ipsec_fs_destroy_single_counter(mdev, ipsec->rx_ipv4->fc); + if (ipsec->is_uplink_rep) { + ipsec_fs_destroy_single_counter(mdev, ipsec->tx_esw->fc); + ipsec_fs_destroy_single_counter(mdev, ipsec->rx_esw->fc); + } } -static int ipsec_fs_init_counters(struct mlx5e_ipsec *ipsec) +static struct mlx5e_ipsec_fc *ipsec_fs_init_single_counter(struct mlx5_core_dev *mdev) { - struct mlx5e_ipsec_rx *rx_ipv4 = ipsec->rx_ipv4; - struct mlx5e_ipsec_rx *rx_ipv6 = ipsec->rx_ipv6; - struct mlx5_core_dev *mdev = ipsec->mdev; - struct mlx5e_ipsec_tx *tx = ipsec->tx; struct mlx5e_ipsec_fc *fc; struct mlx5_fc *counter; int err; - fc = kzalloc(sizeof(*rx_ipv4->fc), GFP_KERNEL); + fc = kzalloc(sizeof(*fc), GFP_KERNEL); if (!fc) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - /* Both IPv4 and IPv6 point to same flow counters struct. */ - rx_ipv4->fc = fc; - rx_ipv6->fc = fc; counter = mlx5_fc_create(mdev, false); if (IS_ERR(counter)) { err = PTR_ERR(counter); - goto err_rx_cnt; + goto err_cnt; } - fc->cnt = counter; + counter = mlx5_fc_create(mdev, false); if (IS_ERR(counter)) { err = PTR_ERR(counter); - goto err_rx_drop; + goto err_drop; } - fc->drop = counter; - fc = kzalloc(sizeof(*tx->fc), GFP_KERNEL); - if (!fc) { - err = -ENOMEM; - goto err_tx_fc; + + return fc; + +err_drop: + mlx5_fc_destroy(mdev, fc->cnt); +err_cnt: + kfree(fc); + return ERR_PTR(err); +} + +static int ipsec_fs_init_counters(struct mlx5e_ipsec *ipsec) +{ + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5e_ipsec_fc *fc; + int err; + + fc = ipsec_fs_init_single_counter(mdev); + if (IS_ERR(fc)) { + err = PTR_ERR(fc); + goto err_rx_cnt; } + ipsec->rx_ipv4->fc = fc; - tx->fc = fc; - counter = mlx5_fc_create(mdev, false); - if (IS_ERR(counter)) { - err = PTR_ERR(counter); + fc = ipsec_fs_init_single_counter(mdev); + if (IS_ERR(fc)) { + err = PTR_ERR(fc); goto err_tx_cnt; } + ipsec->tx->fc = fc; - fc->cnt = counter; - counter = mlx5_fc_create(mdev, false); - if (IS_ERR(counter)) { - err = PTR_ERR(counter); - goto err_tx_drop; + if (ipsec->is_uplink_rep) { + fc = ipsec_fs_init_single_counter(mdev); + if (IS_ERR(fc)) { + err = PTR_ERR(fc); + goto err_rx_esw_cnt; + } + ipsec->rx_esw->fc = fc; + + fc = ipsec_fs_init_single_counter(mdev); + if (IS_ERR(fc)) { + err = PTR_ERR(fc); + goto err_tx_esw_cnt; + } + ipsec->tx_esw->fc = fc; } - fc->drop = counter; + /* Both IPv4 and IPv6 point to same flow counters struct. */ + ipsec->rx_ipv6->fc = ipsec->rx_ipv4->fc; return 0; -err_tx_drop: - mlx5_fc_destroy(mdev, tx->fc->cnt); +err_tx_esw_cnt: + ipsec_fs_destroy_single_counter(mdev, ipsec->rx_esw->fc); +err_rx_esw_cnt: + ipsec_fs_destroy_single_counter(mdev, ipsec->tx->fc); err_tx_cnt: - kfree(tx->fc); -err_tx_fc: - mlx5_fc_destroy(mdev, rx_ipv4->fc->drop); -err_rx_drop: - mlx5_fc_destroy(mdev, rx_ipv4->fc->cnt); + ipsec_fs_destroy_single_counter(mdev, ipsec->rx_ipv4->fc); err_rx_cnt: - kfree(rx_ipv4->fc); return err; } @@ -1458,6 +1675,7 @@ void mlx5e_accel_ipsec_fs_read_stats(struct mlx5e_priv *priv, void *ipsec_stats) struct mlx5e_ipsec *ipsec = priv->ipsec; struct mlx5e_ipsec_hw_stats *stats; struct mlx5e_ipsec_fc *fc; + u64 packets, bytes; stats = (struct mlx5e_ipsec_hw_stats *)ipsec_stats; @@ -1479,14 +1697,94 @@ void mlx5e_accel_ipsec_fs_read_stats(struct mlx5e_priv *priv, void *ipsec_stats) mlx5_fc_query(mdev, fc->cnt, &stats->ipsec_tx_pkts, &stats->ipsec_tx_bytes); mlx5_fc_query(mdev, fc->drop, &stats->ipsec_tx_drop_pkts, &stats->ipsec_tx_drop_bytes); + + if (ipsec->is_uplink_rep) { + fc = ipsec->rx_esw->fc; + if (!mlx5_fc_query(mdev, fc->cnt, &packets, &bytes)) { + stats->ipsec_rx_pkts += packets; + stats->ipsec_rx_bytes += bytes; + } + + if (!mlx5_fc_query(mdev, fc->drop, &packets, &bytes)) { + stats->ipsec_rx_drop_pkts += packets; + stats->ipsec_rx_drop_bytes += bytes; + } + + fc = ipsec->tx_esw->fc; + if (!mlx5_fc_query(mdev, fc->cnt, &packets, &bytes)) { + stats->ipsec_tx_pkts += packets; + stats->ipsec_tx_bytes += bytes; + } + + if (!mlx5_fc_query(mdev, fc->drop, &packets, &bytes)) { + stats->ipsec_tx_drop_pkts += packets; + stats->ipsec_tx_drop_bytes += bytes; + } + } +} + +#ifdef CONFIG_MLX5_ESWITCH +static int mlx5e_ipsec_block_tc_offload(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int err = 0; + + if (esw) + down_write(&esw->mode_lock); + + if (mdev->num_block_ipsec) { + err = -EBUSY; + goto unlock; + } + + mdev->num_block_tc++; + +unlock: + if (esw) + up_write(&esw->mode_lock); + + return err; +} +#else +static int mlx5e_ipsec_block_tc_offload(struct mlx5_core_dev *mdev) +{ + if (mdev->num_block_ipsec) + return -EBUSY; + + mdev->num_block_tc++; + return 0; +} +#endif + +static void mlx5e_ipsec_unblock_tc_offload(struct mlx5_core_dev *mdev) +{ + mdev->num_block_tc++; } int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) { + int err; + + if (sa_entry->attrs.type == XFRM_DEV_OFFLOAD_PACKET) { + err = mlx5e_ipsec_block_tc_offload(sa_entry->ipsec->mdev); + if (err) + return err; + } + if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT) - return tx_add_rule(sa_entry); + err = tx_add_rule(sa_entry); + else + err = rx_add_rule(sa_entry); + + if (err) + goto err_out; - return rx_add_rule(sa_entry); + return 0; + +err_out: + if (sa_entry->attrs.type == XFRM_DEV_OFFLOAD_PACKET) + mlx5e_ipsec_unblock_tc_offload(sa_entry->ipsec->mdev); + return err; } void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry) @@ -1499,21 +1797,40 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry) if (ipsec_rule->pkt_reformat) mlx5_packet_reformat_dealloc(mdev, ipsec_rule->pkt_reformat); + if (sa_entry->attrs.type == XFRM_DEV_OFFLOAD_PACKET) + mlx5e_ipsec_unblock_tc_offload(mdev); + if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT) { - tx_ft_put(sa_entry->ipsec); + tx_ft_put(sa_entry->ipsec, sa_entry->attrs.type); return; } mlx5_modify_header_dealloc(mdev, ipsec_rule->modify_hdr); - rx_ft_put(sa_entry->ipsec, sa_entry->attrs.family); + mlx5_esw_ipsec_rx_id_mapping_remove(sa_entry); + rx_ft_put(sa_entry->ipsec, sa_entry->attrs.family, sa_entry->attrs.type); } int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry) { + int err; + + err = mlx5e_ipsec_block_tc_offload(pol_entry->ipsec->mdev); + if (err) + return err; + if (pol_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT) - return tx_add_policy(pol_entry); + err = tx_add_policy(pol_entry); + else + err = rx_add_policy(pol_entry); + + if (err) + goto err_out; - return rx_add_policy(pol_entry); + return 0; + +err_out: + mlx5e_ipsec_unblock_tc_offload(pol_entry->ipsec->mdev); + return err; } void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry) @@ -1523,16 +1840,18 @@ void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry) mlx5_del_flow_rules(ipsec_rule->rule); + mlx5e_ipsec_unblock_tc_offload(pol_entry->ipsec->mdev); + if (pol_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) { rx_ft_put_policy(pol_entry->ipsec, pol_entry->attrs.family, - pol_entry->attrs.prio); + pol_entry->attrs.prio, pol_entry->attrs.type); return; } if (ipsec_rule->modify_hdr) mlx5_modify_header_dealloc(mdev, ipsec_rule->modify_hdr); - tx_ft_put_policy(pol_entry->ipsec, pol_entry->attrs.prio); + tx_ft_put_policy(pol_entry->ipsec, pol_entry->attrs.prio, pol_entry->attrs.type); } void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec) @@ -1540,7 +1859,7 @@ void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec) if (!ipsec->tx) return; - if (mlx5_ipsec_device_caps(ipsec->mdev) & MLX5_IPSEC_CAP_ROCE) + if (ipsec->roce) mlx5_ipsec_fs_roce_cleanup(ipsec->roce); ipsec_fs_destroy_counters(ipsec); @@ -1555,12 +1874,24 @@ void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec) mutex_destroy(&ipsec->rx_ipv6->ft.mutex); WARN_ON(ipsec->rx_ipv6->ft.refcnt); kfree(ipsec->rx_ipv6); + + if (ipsec->is_uplink_rep) { + xa_destroy(&ipsec->rx_esw->ipsec_obj_id_map); + + mutex_destroy(&ipsec->tx_esw->ft.mutex); + WARN_ON(ipsec->tx_esw->ft.refcnt); + kfree(ipsec->tx_esw); + + mutex_destroy(&ipsec->rx_esw->ft.mutex); + WARN_ON(ipsec->rx_esw->ft.refcnt); + kfree(ipsec->rx_esw); + } } int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec) { struct mlx5_core_dev *mdev = ipsec->mdev; - struct mlx5_flow_namespace *ns; + struct mlx5_flow_namespace *ns, *ns_esw; int err = -ENOMEM; ns = mlx5_get_flow_namespace(ipsec->mdev, @@ -1568,9 +1899,23 @@ int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec) if (!ns) return -EOPNOTSUPP; + if (ipsec->is_uplink_rep) { + ns_esw = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_FDB); + if (!ns_esw) + return -EOPNOTSUPP; + + ipsec->tx_esw = kzalloc(sizeof(*ipsec->tx_esw), GFP_KERNEL); + if (!ipsec->tx_esw) + return -ENOMEM; + + ipsec->rx_esw = kzalloc(sizeof(*ipsec->rx_esw), GFP_KERNEL); + if (!ipsec->rx_esw) + goto err_rx_esw; + } + ipsec->tx = kzalloc(sizeof(*ipsec->tx), GFP_KERNEL); if (!ipsec->tx) - return -ENOMEM; + goto err_tx; ipsec->rx_ipv4 = kzalloc(sizeof(*ipsec->rx_ipv4), GFP_KERNEL); if (!ipsec->rx_ipv4) @@ -1589,8 +1934,14 @@ int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec) mutex_init(&ipsec->rx_ipv6->ft.mutex); ipsec->tx->ns = ns; - if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ROCE) + if (ipsec->is_uplink_rep) { + mutex_init(&ipsec->tx_esw->ft.mutex); + mutex_init(&ipsec->rx_esw->ft.mutex); + ipsec->tx_esw->ns = ns_esw; + xa_init_flags(&ipsec->rx_esw->ipsec_obj_id_map, XA_FLAGS_ALLOC1); + } else if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ROCE) { ipsec->roce = mlx5_ipsec_fs_roce_init(mdev); + } return 0; @@ -1600,6 +1951,10 @@ err_rx_ipv6: kfree(ipsec->rx_ipv4); err_rx_ipv4: kfree(ipsec->tx); +err_tx: + kfree(ipsec->rx_esw); +err_rx_esw: + kfree(ipsec->tx_esw); return err; } @@ -1621,10 +1976,12 @@ void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry) bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry) { - struct mlx5e_ipsec_rx *rx = - ipsec_rx(sa_entry->ipsec, sa_entry->attrs.family); - struct mlx5e_ipsec_tx *tx = sa_entry->ipsec->tx; + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; + struct mlx5e_ipsec_rx *rx; + struct mlx5e_ipsec_tx *tx; + rx = ipsec_rx(sa_entry->ipsec, attrs->family, attrs->type); + tx = ipsec_tx(sa_entry->ipsec, attrs->type); if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT) return tx->allow_tunnel_mode; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index a3554bde3e07..3245d1c9d539 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -45,8 +45,9 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) MLX5_CAP_FLOWTABLE_NIC_RX(mdev, decap)) caps |= MLX5_IPSEC_CAP_PACKET_OFFLOAD; - if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) && - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level)) + if ((MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level)) || + MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level)) caps |= MLX5_IPSEC_CAP_PRIO; if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, @@ -54,6 +55,12 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) MLX5_CAP_FLOWTABLE_NIC_RX(mdev, reformat_l3_esp_tunnel_to_l2)) caps |= MLX5_IPSEC_CAP_TUNNEL; + + if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, + reformat_add_esp_transport_over_udp) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + reformat_del_esp_transport_over_udp)) + caps |= MLX5_IPSEC_CAP_ESPINUDP; } if (mlx5_get_roce_state(mdev) && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index eab5bc718771..51a144246ea6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -37,6 +37,7 @@ #include "ipsec.h" #include "ipsec_rxtx.h" #include "en.h" +#include "esw/ipsec_fs.h" enum { MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8, @@ -58,7 +59,9 @@ static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x) trailer_len = alen + plen + 2; - pskb_trim(skb, skb->len - trailer_len); + ret = pskb_trim(skb, skb->len - trailer_len); + if (unlikely(ret)) + return ret; if (skb->protocol == htons(ETH_P_IP)) { ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len); ip_send_check(ipv4hdr); @@ -309,9 +312,8 @@ enum { void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb, - struct mlx5_cqe64 *cqe) + u32 ipsec_meta_data) { - u32 ipsec_meta_data = be32_to_cpu(cqe->ft_metadata); struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_ipsec *ipsec = priv->ipsec; struct mlx5e_ipsec_sa_entry *sa_entry; @@ -356,3 +358,24 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, atomic64_inc(&ipsec->sw_stats.ipsec_rx_drop_syndrome); } } + +int mlx5_esw_ipsec_rx_make_metadata(struct mlx5e_priv *priv, u32 id, u32 *metadata) +{ + struct mlx5e_ipsec *ipsec = priv->ipsec; + u32 ipsec_obj_id; + int err; + + if (!ipsec || !ipsec->is_uplink_rep) + return -EINVAL; + + err = mlx5_esw_ipsec_rx_ipsec_obj_id_search(priv, id, &ipsec_obj_id); + if (err) { + atomic64_inc(&ipsec->sw_stats.ipsec_rx_drop_sadb_miss); + return err; + } + + *metadata = MLX5_IPSEC_METADATA_CREATE(ipsec_obj_id, + MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index 1878a70b9031..9ee014a8ad24 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -43,6 +43,7 @@ #define MLX5_IPSEC_METADATA_MARKER(metadata) (((metadata) >> 31) & 0x1) #define MLX5_IPSEC_METADATA_SYNDROM(metadata) (((metadata) >> 24) & GENMASK(5, 0)) #define MLX5_IPSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(23, 0)) +#define MLX5_IPSEC_METADATA_CREATE(id, syndrome) ((id) | ((syndrome) << 24)) struct mlx5e_accel_tx_ipsec_state { struct xfrm_offload *xo; @@ -66,7 +67,8 @@ void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe, struct mlx5_wqe_inline_seg *inlseg); void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb, - struct mlx5_cqe64 *cqe); + u32 ipsec_meta_data); +int mlx5_esw_ipsec_rx_make_metadata(struct mlx5e_priv *priv, u32 id, u32 *metadata); static inline unsigned int mlx5e_ipsec_tx_ids_len(struct mlx5e_accel_tx_ipsec_state *ipsec_st) { return ipsec_st->tailen; @@ -145,7 +147,7 @@ mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, static inline void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb, - struct mlx5_cqe64 *cqe) + u32 ipsec_meta_data) {} static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index cf704f106b7c..984fa04bd331 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -188,7 +188,6 @@ static void mlx5e_tls_debugfs_init(struct mlx5e_tls *tls, int mlx5e_ktls_init(struct mlx5e_priv *priv) { - struct mlx5_crypto_dek_pool *dek_pool; struct mlx5e_tls *tls; if (!mlx5e_is_ktls_device(priv->mdev)) @@ -199,12 +198,6 @@ int mlx5e_ktls_init(struct mlx5e_priv *priv) return -ENOMEM; tls->mdev = priv->mdev; - dek_pool = mlx5_crypto_dek_pool_create(priv->mdev, MLX5_ACCEL_OBJ_TLS_KEY); - if (IS_ERR(dek_pool)) { - kfree(tls); - return PTR_ERR(dek_pool); - } - tls->dek_pool = dek_pool; priv->tls = tls; mlx5e_tls_debugfs_init(tls, priv->dfs_root); @@ -222,7 +215,6 @@ void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) debugfs_remove_recursive(tls->debugfs.dfs); tls->debugfs.dfs = NULL; - mlx5_crypto_dek_pool_destroy(tls->dek_pool); kfree(priv->tls); priv->tls = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index efb2cf74ad6a..d61be26a4df1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -908,28 +908,51 @@ static void mlx5e_tls_tx_debugfs_init(struct mlx5e_tls *tls, int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) { + struct mlx5_crypto_dek_pool *dek_pool; struct mlx5e_tls *tls = priv->tls; + int err; + + if (!mlx5e_is_ktls_device(priv->mdev)) + return 0; + + /* DEK pool could be used by either or both of TX and RX. But we have to + * put the creation here to avoid syndrome when doing devlink reload. + */ + dek_pool = mlx5_crypto_dek_pool_create(priv->mdev, MLX5_ACCEL_OBJ_TLS_KEY); + if (IS_ERR(dek_pool)) + return PTR_ERR(dek_pool); + tls->dek_pool = dek_pool; if (!mlx5e_is_ktls_tx(priv->mdev)) return 0; priv->tls->tx_pool = mlx5e_tls_tx_pool_init(priv->mdev, &priv->tls->sw_stats); - if (!priv->tls->tx_pool) - return -ENOMEM; + if (!priv->tls->tx_pool) { + err = -ENOMEM; + goto err_tx_pool_init; + } mlx5e_tls_tx_debugfs_init(tls, tls->debugfs.dfs); return 0; + +err_tx_pool_init: + mlx5_crypto_dek_pool_destroy(dek_pool); + return err; } void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv) { if (!mlx5e_is_ktls_tx(priv->mdev)) - return; + goto dek_pool_destroy; debugfs_remove_recursive(priv->tls->debugfs.dfs_tx); priv->tls->debugfs.dfs_tx = NULL; mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool); priv->tls->tx_pool = NULL; + +dek_pool_destroy: + if (mlx5e_is_ktls_device(priv->mdev)) + mlx5_crypto_dek_pool_destroy(priv->tls->dek_pool); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 592b165530ff..c9c1db971652 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -10,7 +10,6 @@ #include "lib/aso.h" #include "lib/crypto.h" #include "en_accel/macsec.h" -#include "en_accel/macsec_fs.h" #define MLX5_MACSEC_EPN_SCOPE_MID 0x80000000L #define MLX5E_MACSEC_ASO_CTX_SZ MLX5_ST_SZ_BYTES(macsec_aso) @@ -66,9 +65,7 @@ struct mlx5e_macsec_sa { ssci_t ssci; salt_t salt; - struct rhash_head hash; - u32 fs_id; - union mlx5e_macsec_rule *macsec_rule; + union mlx5_macsec_rule *macsec_rule; struct rcu_head rcu_head; struct mlx5e_macsec_epn_state epn_state; }; @@ -106,14 +103,6 @@ struct mlx5e_macsec_aso { u32 pdn; }; -static const struct rhashtable_params rhash_sci = { - .key_len = sizeof_field(struct mlx5e_macsec_sa, sci), - .key_offset = offsetof(struct mlx5e_macsec_sa, sci), - .head_offset = offsetof(struct mlx5e_macsec_sa, hash), - .automatic_shrinking = true, - .min_size = 1, -}; - struct mlx5e_macsec_device { const struct net_device *netdev; struct mlx5e_macsec_sa *tx_sa[MACSEC_NUM_AN]; @@ -125,20 +114,13 @@ struct mlx5e_macsec_device { struct mlx5e_macsec { struct list_head macsec_device_list_head; int num_of_devices; - struct mlx5e_macsec_fs *macsec_fs; struct mutex lock; /* Protects mlx5e_macsec internal contexts */ - /* Tx sci -> fs id mapping handling */ - struct rhashtable sci_hash; /* sci -> mlx5e_macsec_sa */ - /* Rx fs_id -> rx_sc mapping */ struct xarray sc_xarray; struct mlx5_core_dev *mdev; - /* Stats manage */ - struct mlx5e_macsec_stats stats; - /* ASO */ struct mlx5e_macsec_aso aso; @@ -330,36 +312,30 @@ static void mlx5e_macsec_destroy_object(struct mlx5_core_dev *mdev, u32 macsec_o static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, struct mlx5e_macsec_sa *sa, - bool is_tx) + bool is_tx, struct net_device *netdev, u32 fs_id) { int action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : MLX5_ACCEL_MACSEC_ACTION_DECRYPT; - if ((is_tx) && sa->fs_id) { - /* Make sure ongoing datapath readers sees a valid SA */ - rhashtable_remove_fast(&macsec->sci_hash, &sa->hash, rhash_sci); - sa->fs_id = 0; - } - if (!sa->macsec_rule) return; - mlx5e_macsec_fs_del_rule(macsec->macsec_fs, sa->macsec_rule, action); + mlx5_macsec_fs_del_rule(macsec->mdev->macsec_fs, sa->macsec_rule, action, netdev, + fs_id); mlx5e_macsec_destroy_object(macsec->mdev, sa->macsec_obj_id); sa->macsec_rule = NULL; } static int mlx5e_macsec_init_sa(struct macsec_context *ctx, struct mlx5e_macsec_sa *sa, - bool encrypt, - bool is_tx) + bool encrypt, bool is_tx, u32 *fs_id) { struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); struct mlx5e_macsec *macsec = priv->macsec; struct mlx5_macsec_rule_attrs rule_attrs; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_macsec_obj_attrs obj_attrs; - union mlx5e_macsec_rule *macsec_rule; + union mlx5_macsec_rule *macsec_rule; int err; obj_attrs.next_pn = sa->next_pn; @@ -387,7 +363,7 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, rule_attrs.action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : MLX5_ACCEL_MACSEC_ACTION_DECRYPT; - macsec_rule = mlx5e_macsec_fs_add_rule(macsec->macsec_fs, ctx, &rule_attrs, &sa->fs_id); + macsec_rule = mlx5_macsec_fs_add_rule(mdev->macsec_fs, ctx, &rule_attrs, fs_id); if (!macsec_rule) { err = -ENOMEM; goto destroy_macsec_object; @@ -395,16 +371,8 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, sa->macsec_rule = macsec_rule; - if (is_tx) { - err = rhashtable_insert_fast(&macsec->sci_hash, &sa->hash, rhash_sci); - if (err) - goto destroy_macsec_object_and_rule; - } - return 0; -destroy_macsec_object_and_rule: - mlx5e_macsec_cleanup_sa(macsec, sa, is_tx); destroy_macsec_object: mlx5e_macsec_destroy_object(mdev, sa->macsec_obj_id); @@ -426,7 +394,7 @@ mlx5e_macsec_get_rx_sc_from_sc_list(const struct list_head *list, sci_t sci) static int macsec_rx_sa_active_update(struct macsec_context *ctx, struct mlx5e_macsec_sa *rx_sa, - bool active) + bool active, u32 *fs_id) { struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); struct mlx5e_macsec *macsec = priv->macsec; @@ -437,11 +405,11 @@ static int macsec_rx_sa_active_update(struct macsec_context *ctx, rx_sa->active = active; if (!active) { - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, *fs_id); return 0; } - err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false); + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false, fs_id); if (err) rx_sa->active = false; @@ -563,7 +531,7 @@ static int mlx5e_macsec_add_txsa(struct macsec_context *ctx) !tx_sa->active) goto out; - err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true); + err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); if (err) goto destroy_encryption_key; @@ -627,7 +595,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) goto out; if (ctx_tx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true); + err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); if (err) goto out; } else { @@ -636,7 +604,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) goto out; } - mlx5e_macsec_cleanup_sa(macsec, tx_sa, true); + mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0); } out: mutex_unlock(&macsec->lock); @@ -669,7 +637,7 @@ static int mlx5e_macsec_del_txsa(struct macsec_context *ctx) goto out; } - mlx5e_macsec_cleanup_sa(macsec, tx_sa, true); + mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0); mlx5_destroy_encryption_key(macsec->mdev, tx_sa->enc_key_id); kfree_rcu_mightsleep(tx_sa); macsec_device->tx_sa[assoc_num] = NULL; @@ -680,20 +648,6 @@ out: return err; } -static u32 mlx5e_macsec_get_sa_from_hashtable(struct rhashtable *sci_hash, sci_t *sci) -{ - struct mlx5e_macsec_sa *macsec_sa; - u32 fs_id = 0; - - rcu_read_lock(); - macsec_sa = rhashtable_lookup(sci_hash, sci, rhash_sci); - if (macsec_sa) - fs_id = macsec_sa->fs_id; - rcu_read_unlock(); - - return fs_id; -} - static int mlx5e_macsec_add_rxsc(struct macsec_context *ctx) { struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element; @@ -813,7 +767,8 @@ static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx) if (!rx_sa) continue; - err = macsec_rx_sa_active_update(ctx, rx_sa, rx_sa->active && ctx_rx_sc->active); + err = macsec_rx_sa_active_update(ctx, rx_sa, rx_sa->active && ctx_rx_sc->active, + &rx_sc->sc_xarray_element->fs_id); if (err) goto out; } @@ -824,7 +779,8 @@ out: return err; } -static void macsec_del_rxsc_ctx(struct mlx5e_macsec *macsec, struct mlx5e_macsec_rx_sc *rx_sc) +static void macsec_del_rxsc_ctx(struct mlx5e_macsec *macsec, struct mlx5e_macsec_rx_sc *rx_sc, + struct net_device *netdev) { struct mlx5e_macsec_sa *rx_sa; int i; @@ -834,7 +790,8 @@ static void macsec_del_rxsc_ctx(struct mlx5e_macsec *macsec, struct mlx5e_macsec if (!rx_sa) continue; - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, netdev, + rx_sc->sc_xarray_element->fs_id); mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); kfree(rx_sa); @@ -882,7 +839,7 @@ static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx) goto out; } - macsec_del_rxsc_ctx(macsec, rx_sc); + macsec_del_rxsc_ctx(macsec, rx_sc, ctx->secy->netdev); out: mutex_unlock(&macsec->lock); @@ -941,7 +898,6 @@ static int mlx5e_macsec_add_rxsa(struct macsec_context *ctx) rx_sa->next_pn = ctx_rx_sa->next_pn; rx_sa->sci = sci; rx_sa->assoc_num = assoc_num; - rx_sa->fs_id = rx_sc->sc_xarray_element->fs_id; if (ctx->secy->xpn) update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves, @@ -958,7 +914,7 @@ static int mlx5e_macsec_add_rxsa(struct macsec_context *ctx) goto out; //TODO - add support for both authentication and encryption flows - err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false); + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false, &rx_sc->sc_xarray_element->fs_id); if (err) goto destroy_encryption_key; @@ -1025,7 +981,8 @@ static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx) goto out; } - err = macsec_rx_sa_active_update(ctx, rx_sa, ctx_rx_sa->active); + err = macsec_rx_sa_active_update(ctx, rx_sa, ctx_rx_sa->active, + &rx_sc->sc_xarray_element->fs_id); out: mutex_unlock(&macsec->lock); @@ -1073,7 +1030,8 @@ static int mlx5e_macsec_del_rxsa(struct macsec_context *ctx) goto out; } - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, + rx_sc->sc_xarray_element->fs_id); mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); kfree(rx_sa); rx_sc->rx_sa[assoc_num] = NULL; @@ -1154,7 +1112,8 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx, if (!rx_sa || !rx_sa->macsec_rule) continue; - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, + rx_sc->sc_xarray_element->fs_id); } } @@ -1165,7 +1124,8 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx, continue; if (rx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false); + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false, + &rx_sc->sc_xarray_element->fs_id); if (err) goto out; } @@ -1218,7 +1178,7 @@ static int mlx5e_macsec_upd_secy(struct macsec_context *ctx) if (!tx_sa) continue; - mlx5e_macsec_cleanup_sa(macsec, tx_sa, true); + mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0); } for (i = 0; i < MACSEC_NUM_AN; ++i) { @@ -1227,7 +1187,7 @@ static int mlx5e_macsec_upd_secy(struct macsec_context *ctx) continue; if (tx_sa->assoc_num == tx_sc->encoding_sa && tx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true); + err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); if (err) goto out; } @@ -1265,7 +1225,7 @@ static int mlx5e_macsec_del_secy(struct macsec_context *ctx) if (!tx_sa) continue; - mlx5e_macsec_cleanup_sa(macsec, tx_sa, true); + mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0); mlx5_destroy_encryption_key(macsec->mdev, tx_sa->enc_key_id); kfree(tx_sa); macsec_device->tx_sa[i] = NULL; @@ -1273,7 +1233,7 @@ static int mlx5e_macsec_del_secy(struct macsec_context *ctx) list = &macsec_device->macsec_rx_sc_list_head; list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) - macsec_del_rxsc_ctx(macsec, rx_sc); + macsec_del_rxsc_ctx(macsec, rx_sc, ctx->secy->netdev); kfree(macsec_device->dev_addr); macsec_device->dev_addr = NULL; @@ -1647,50 +1607,6 @@ static void mlx5e_macsec_aso_cleanup(struct mlx5e_macsec_aso *aso, struct mlx5_c mlx5_core_dealloc_pd(mdev, aso->pdn); } -bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev) -{ - if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) & - MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD)) - return false; - - if (!MLX5_CAP_GEN(mdev, log_max_dek)) - return false; - - if (!MLX5_CAP_MACSEC(mdev, log_max_macsec_offload)) - return false; - - if (!MLX5_CAP_FLOWTABLE_NIC_RX(mdev, macsec_decrypt) || - !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, reformat_remove_macsec)) - return false; - - if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, macsec_encrypt) || - !MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_macsec)) - return false; - - if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_encrypt) && - !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_encrypt)) - return false; - - if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_decrypt) && - !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_decrypt)) - return false; - - return true; -} - -void mlx5e_macsec_get_stats_fill(struct mlx5e_macsec *macsec, void *macsec_stats) -{ - mlx5e_macsec_fs_get_stats_fill(macsec->macsec_fs, macsec_stats); -} - -struct mlx5e_macsec_stats *mlx5e_macsec_get_stats(struct mlx5e_macsec *macsec) -{ - if (!macsec) - return NULL; - - return &macsec->stats; -} - static const struct macsec_ops macsec_offload_ops = { .mdo_add_txsa = mlx5e_macsec_add_txsa, .mdo_upd_txsa = mlx5e_macsec_upd_txsa, @@ -1711,7 +1627,8 @@ bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb struct metadata_dst *md_dst = skb_metadata_dst(skb); u32 fs_id; - fs_id = mlx5e_macsec_get_sa_from_hashtable(&macsec->sci_hash, &md_dst->u.macsec_info.sci); + fs_id = mlx5_macsec_fs_get_fs_id_from_hashtable(macsec->mdev->macsec_fs, + &md_dst->u.macsec_info.sci); if (!fs_id) goto err_out; @@ -1729,7 +1646,8 @@ void mlx5e_macsec_tx_build_eseg(struct mlx5e_macsec *macsec, struct metadata_dst *md_dst = skb_metadata_dst(skb); u32 fs_id; - fs_id = mlx5e_macsec_get_sa_from_hashtable(&macsec->sci_hash, &md_dst->u.macsec_info.sci); + fs_id = mlx5_macsec_fs_get_fs_id_from_hashtable(macsec->mdev->macsec_fs, + &md_dst->u.macsec_info.sci); if (!fs_id) return; @@ -1782,7 +1700,7 @@ int mlx5e_macsec_init(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_macsec *macsec = NULL; - struct mlx5e_macsec_fs *macsec_fs; + struct mlx5_macsec_fs *macsec_fs; int err; if (!mlx5e_is_macsec_device(priv->mdev)) { @@ -1797,13 +1715,6 @@ int mlx5e_macsec_init(struct mlx5e_priv *priv) INIT_LIST_HEAD(&macsec->macsec_device_list_head); mutex_init(&macsec->lock); - err = rhashtable_init(&macsec->sci_hash, &rhash_sci); - if (err) { - mlx5_core_err(mdev, "MACsec offload: Failed to init SCI hash table, err=%d\n", - err); - goto err_hash; - } - err = mlx5e_macsec_aso_init(&macsec->aso, priv->mdev); if (err) { mlx5_core_err(mdev, "MACsec offload: Failed to init aso, err=%d\n", err); @@ -1822,13 +1733,13 @@ int mlx5e_macsec_init(struct mlx5e_priv *priv) macsec->mdev = mdev; - macsec_fs = mlx5e_macsec_fs_init(mdev, priv->netdev); + macsec_fs = mlx5_macsec_fs_init(mdev); if (!macsec_fs) { err = -ENOMEM; goto err_out; } - macsec->macsec_fs = macsec_fs; + mdev->macsec_fs = macsec_fs; macsec->nb.notifier_call = macsec_obj_change_event; mlx5_notifier_register(mdev, &macsec->nb); @@ -1842,8 +1753,6 @@ err_out: err_wq: mlx5e_macsec_aso_cleanup(&macsec->aso, priv->mdev); err_aso: - rhashtable_destroy(&macsec->sci_hash); -err_hash: kfree(macsec); priv->macsec = NULL; return err; @@ -1858,10 +1767,9 @@ void mlx5e_macsec_cleanup(struct mlx5e_priv *priv) return; mlx5_notifier_unregister(mdev, &macsec->nb); - mlx5e_macsec_fs_cleanup(macsec->macsec_fs); + mlx5_macsec_fs_cleanup(mdev->macsec_fs); destroy_workqueue(macsec->wq); mlx5e_macsec_aso_cleanup(&macsec->aso, mdev); - rhashtable_destroy(&macsec->sci_hash); mutex_destroy(&macsec->lock); kfree(macsec); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h index 347380a2cd9c..27df72e23106 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h @@ -4,32 +4,16 @@ #ifndef __MLX5_EN_ACCEL_MACSEC_H__ #define __MLX5_EN_ACCEL_MACSEC_H__ -#ifdef CONFIG_MLX5_EN_MACSEC +#ifdef CONFIG_MLX5_MACSEC #include <linux/mlx5/driver.h> #include <net/macsec.h> #include <net/dst_metadata.h> - -/* Bit31 - 30: MACsec marker, Bit15-0: MACsec id */ -#define MLX5_MACEC_RX_FS_ID_MAX USHRT_MAX /* Must be power of two */ -#define MLX5_MACSEC_RX_FS_ID_MASK MLX5_MACEC_RX_FS_ID_MAX -#define MLX5_MACSEC_METADATA_MARKER(metadata) ((((metadata) >> 30) & 0x3) == 0x1) -#define MLX5_MACSEC_RX_METADAT_HANDLE(metadata) ((metadata) & MLX5_MACSEC_RX_FS_ID_MASK) +#include "lib/macsec_fs.h" struct mlx5e_priv; struct mlx5e_macsec; -struct mlx5e_macsec_stats { - u64 macsec_rx_pkts; - u64 macsec_rx_bytes; - u64 macsec_rx_pkts_drop; - u64 macsec_rx_bytes_drop; - u64 macsec_tx_pkts; - u64 macsec_tx_bytes; - u64 macsec_tx_pkts_drop; - u64 macsec_tx_bytes_drop; -}; - void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv); int mlx5e_macsec_init(struct mlx5e_priv *priv); void mlx5e_macsec_cleanup(struct mlx5e_priv *priv); @@ -52,9 +36,6 @@ static inline bool mlx5e_macsec_is_rx_flow(struct mlx5_cqe64 *cqe) void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb, struct mlx5_cqe64 *cqe); -bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev); -void mlx5e_macsec_get_stats_fill(struct mlx5e_macsec *macsec, void *macsec_stats); -struct mlx5e_macsec_stats *mlx5e_macsec_get_stats(struct mlx5e_macsec *macsec); #else @@ -67,7 +48,6 @@ static inline void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb, struct mlx5_cqe64 *cqe) {} -static inline bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev) { return false; } -#endif /* CONFIG_MLX5_EN_MACSEC */ +#endif /* CONFIG_MLX5_MACSEC */ #endif /* __MLX5_ACCEL_EN_MACSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c deleted file mode 100644 index 7fc901a6ec5f..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c +++ /dev/null @@ -1,1393 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ - -#include <net/macsec.h> -#include <linux/netdevice.h> -#include <linux/mlx5/qp.h> -#include <linux/if_vlan.h> -#include "fs_core.h" -#include "en/fs.h" -#include "en_accel/macsec_fs.h" -#include "mlx5_core.h" - -/* MACsec TX flow steering */ -#define CRYPTO_NUM_MAXSEC_FTE BIT(15) -#define CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE 1 - -#define TX_CRYPTO_TABLE_LEVEL 0 -#define TX_CRYPTO_TABLE_NUM_GROUPS 3 -#define TX_CRYPTO_TABLE_MKE_GROUP_SIZE 1 -#define TX_CRYPTO_TABLE_SA_GROUP_SIZE \ - (CRYPTO_NUM_MAXSEC_FTE - (TX_CRYPTO_TABLE_MKE_GROUP_SIZE + \ - CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE)) -#define TX_CHECK_TABLE_LEVEL 1 -#define TX_CHECK_TABLE_NUM_FTE 2 -#define RX_CRYPTO_TABLE_LEVEL 0 -#define RX_CHECK_TABLE_LEVEL 1 -#define RX_CHECK_TABLE_NUM_FTE 3 -#define RX_CRYPTO_TABLE_NUM_GROUPS 3 -#define RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE \ - ((CRYPTO_NUM_MAXSEC_FTE - CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE) / 2) -#define RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE \ - (CRYPTO_NUM_MAXSEC_FTE - RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE) -#define RX_NUM_OF_RULES_PER_SA 2 - -#define MLX5_MACSEC_TAG_LEN 8 /* SecTAG length with ethertype and without the optional SCI */ -#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK 0x23 -#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET 0x8 -#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET 0x5 -#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT (0x1 << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET) -#define MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI 0x8 -#define MLX5_SECTAG_HEADER_SIZE_WITH_SCI (MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI + MACSEC_SCI_LEN) - -/* MACsec RX flow steering */ -#define MLX5_ETH_WQE_FT_META_MACSEC_MASK 0x3E - -struct mlx5_sectag_header { - __be16 ethertype; - u8 tci_an; - u8 sl; - u32 pn; - u8 sci[MACSEC_SCI_LEN]; /* optional */ -} __packed; - -struct mlx5e_macsec_tx_rule { - struct mlx5_flow_handle *rule; - struct mlx5_pkt_reformat *pkt_reformat; - u32 fs_id; -}; - -struct mlx5e_macsec_tables { - struct mlx5e_flow_table ft_crypto; - struct mlx5_flow_handle *crypto_miss_rule; - - struct mlx5_flow_table *ft_check; - struct mlx5_flow_group *ft_check_group; - struct mlx5_fc *check_miss_rule_counter; - struct mlx5_flow_handle *check_miss_rule; - struct mlx5_fc *check_rule_counter; - - u32 refcnt; -}; - -struct mlx5e_macsec_tx { - struct mlx5_flow_handle *crypto_mke_rule; - struct mlx5_flow_handle *check_rule; - - struct ida tx_halloc; - - struct mlx5e_macsec_tables tables; -}; - -struct mlx5e_macsec_rx_rule { - struct mlx5_flow_handle *rule[RX_NUM_OF_RULES_PER_SA]; - struct mlx5_modify_hdr *meta_modhdr; -}; - -struct mlx5e_macsec_rx { - struct mlx5_flow_handle *check_rule[2]; - struct mlx5_pkt_reformat *check_rule_pkt_reformat[2]; - - struct mlx5e_macsec_tables tables; -}; - -union mlx5e_macsec_rule { - struct mlx5e_macsec_tx_rule tx_rule; - struct mlx5e_macsec_rx_rule rx_rule; -}; - -struct mlx5e_macsec_fs { - struct mlx5_core_dev *mdev; - struct net_device *netdev; - struct mlx5e_macsec_tx *tx_fs; - struct mlx5e_macsec_rx *rx_fs; -}; - -static void macsec_fs_tx_destroy(struct mlx5e_macsec_fs *macsec_fs) -{ - struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; - struct mlx5e_macsec_tables *tx_tables; - - tx_tables = &tx_fs->tables; - - /* Tx check table */ - if (tx_fs->check_rule) { - mlx5_del_flow_rules(tx_fs->check_rule); - tx_fs->check_rule = NULL; - } - - if (tx_tables->check_miss_rule) { - mlx5_del_flow_rules(tx_tables->check_miss_rule); - tx_tables->check_miss_rule = NULL; - } - - if (tx_tables->ft_check_group) { - mlx5_destroy_flow_group(tx_tables->ft_check_group); - tx_tables->ft_check_group = NULL; - } - - if (tx_tables->ft_check) { - mlx5_destroy_flow_table(tx_tables->ft_check); - tx_tables->ft_check = NULL; - } - - /* Tx crypto table */ - if (tx_fs->crypto_mke_rule) { - mlx5_del_flow_rules(tx_fs->crypto_mke_rule); - tx_fs->crypto_mke_rule = NULL; - } - - if (tx_tables->crypto_miss_rule) { - mlx5_del_flow_rules(tx_tables->crypto_miss_rule); - tx_tables->crypto_miss_rule = NULL; - } - - mlx5e_destroy_flow_table(&tx_tables->ft_crypto); -} - -static int macsec_fs_tx_create_crypto_table_groups(struct mlx5e_flow_table *ft) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - int mclen = MLX5_ST_SZ_BYTES(fte_match_param); - int ix = 0; - u32 *in; - int err; - u8 *mc; - - ft->g = kcalloc(TX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); - if (!ft->g) - return -ENOMEM; - in = kvzalloc(inlen, GFP_KERNEL); - - if (!in) { - kfree(ft->g); - return -ENOMEM; - } - - mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - - /* Flow Group for MKE match */ - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - - MLX5_SET_CFG(in, start_flow_index, ix); - ix += TX_CRYPTO_TABLE_MKE_GROUP_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - /* Flow Group for SA rules */ - memset(in, 0, inlen); - memset(mc, 0, mclen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2); - MLX5_SET(fte_match_param, mc, misc_parameters_2.metadata_reg_a, - MLX5_ETH_WQE_FT_META_MACSEC_MASK); - - MLX5_SET_CFG(in, start_flow_index, ix); - ix += TX_CRYPTO_TABLE_SA_GROUP_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - /* Flow Group for l2 traps */ - memset(in, 0, inlen); - memset(mc, 0, mclen); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - kvfree(in); - return 0; - -err: - err = PTR_ERR(ft->g[ft->num_groups]); - ft->g[ft->num_groups] = NULL; - kvfree(in); - - return err; -} - -static struct mlx5_flow_table - *macsec_fs_auto_group_table_create(struct mlx5_flow_namespace *ns, int flags, - int level, int max_fte) -{ - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_flow_table *fdb = NULL; - - /* reserve entry for the match all miss group and rule */ - ft_attr.autogroup.num_reserved_entries = 1; - ft_attr.autogroup.max_num_groups = 1; - ft_attr.prio = 0; - ft_attr.flags = flags; - ft_attr.level = level; - ft_attr.max_fte = max_fte; - - fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); - - return fdb; -} - -static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; - struct net_device *netdev = macsec_fs->netdev; - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_flow_destination dest = {}; - struct mlx5e_macsec_tables *tx_tables; - struct mlx5_flow_act flow_act = {}; - struct mlx5e_flow_table *ft_crypto; - struct mlx5_flow_table *flow_table; - struct mlx5_flow_group *flow_group; - struct mlx5_flow_namespace *ns; - struct mlx5_flow_handle *rule; - struct mlx5_flow_spec *spec; - u32 *flow_group_in; - int err; - - ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_EGRESS_MACSEC); - if (!ns) - return -ENOMEM; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return -ENOMEM; - - flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!flow_group_in) { - err = -ENOMEM; - goto out_spec; - } - - tx_tables = &tx_fs->tables; - ft_crypto = &tx_tables->ft_crypto; - - /* Tx crypto table */ - ft_attr.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - ft_attr.level = TX_CRYPTO_TABLE_LEVEL; - ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE; - - flow_table = mlx5_create_flow_table(ns, &ft_attr); - if (IS_ERR(flow_table)) { - err = PTR_ERR(flow_table); - netdev_err(netdev, "Failed to create MACsec Tx crypto table err(%d)\n", err); - goto out_flow_group; - } - ft_crypto->t = flow_table; - - /* Tx crypto table groups */ - err = macsec_fs_tx_create_crypto_table_groups(ft_crypto); - if (err) { - netdev_err(netdev, - "Failed to create default flow group for MACsec Tx crypto table err(%d)\n", - err); - goto err; - } - - /* Tx crypto table MKE rule - MKE packets shouldn't be offloaded */ - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_PAE); - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; - - rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, NULL, 0); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - netdev_err(netdev, "Failed to add MACsec TX MKE rule, err=%d\n", err); - goto err; - } - tx_fs->crypto_mke_rule = rule; - - /* Tx crypto table Default miss rule */ - memset(&flow_act, 0, sizeof(flow_act)); - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; - rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - netdev_err(netdev, "Failed to add MACsec Tx table default miss rule %d\n", err); - goto err; - } - tx_tables->crypto_miss_rule = rule; - - /* Tx check table */ - flow_table = macsec_fs_auto_group_table_create(ns, 0, TX_CHECK_TABLE_LEVEL, - TX_CHECK_TABLE_NUM_FTE); - if (IS_ERR(flow_table)) { - err = PTR_ERR(flow_table); - netdev_err(netdev, "fail to create MACsec TX check table, err(%d)\n", err); - goto err; - } - tx_tables->ft_check = flow_table; - - /* Tx check table Default miss group/rule */ - memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1); - flow_group = mlx5_create_flow_group(tx_tables->ft_check, flow_group_in); - if (IS_ERR(flow_group)) { - err = PTR_ERR(flow_group); - netdev_err(netdev, - "Failed to create default flow group for MACsec Tx crypto table err(%d)\n", - err); - goto err; - } - tx_tables->ft_check_group = flow_group; - - /* Tx check table default drop rule */ - memset(&dest, 0, sizeof(struct mlx5_flow_destination)); - memset(&flow_act, 0, sizeof(flow_act)); - dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(tx_tables->check_miss_rule_counter); - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; - rule = mlx5_add_flow_rules(tx_tables->ft_check, NULL, &flow_act, &dest, 1); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - netdev_err(netdev, "Failed to added MACsec tx check drop rule, err(%d)\n", err); - goto err; - } - tx_tables->check_miss_rule = rule; - - /* Tx check table rule */ - memset(spec, 0, sizeof(struct mlx5_flow_spec)); - memset(&dest, 0, sizeof(struct mlx5_flow_destination)); - memset(&flow_act, 0, sizeof(flow_act)); - - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4); - MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0); - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; - - flow_act.flags = FLOW_ACT_NO_APPEND; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | MLX5_FLOW_CONTEXT_ACTION_COUNT; - dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(tx_tables->check_rule_counter); - rule = mlx5_add_flow_rules(tx_tables->ft_check, spec, &flow_act, &dest, 1); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - netdev_err(netdev, "Failed to add MACsec check rule, err=%d\n", err); - goto err; - } - tx_fs->check_rule = rule; - - goto out_flow_group; - -err: - macsec_fs_tx_destroy(macsec_fs); -out_flow_group: - kvfree(flow_group_in); -out_spec: - kvfree(spec); - return err; -} - -static int macsec_fs_tx_ft_get(struct mlx5e_macsec_fs *macsec_fs) -{ - struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; - struct mlx5e_macsec_tables *tx_tables; - int err = 0; - - tx_tables = &tx_fs->tables; - if (tx_tables->refcnt) - goto out; - - err = macsec_fs_tx_create(macsec_fs); - if (err) - return err; - -out: - tx_tables->refcnt++; - return err; -} - -static void macsec_fs_tx_ft_put(struct mlx5e_macsec_fs *macsec_fs) -{ - struct mlx5e_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables; - - if (--tx_tables->refcnt) - return; - - macsec_fs_tx_destroy(macsec_fs); -} - -static int macsec_fs_tx_setup_fte(struct mlx5e_macsec_fs *macsec_fs, - struct mlx5_flow_spec *spec, - struct mlx5_flow_act *flow_act, - u32 macsec_obj_id, - u32 *fs_id) -{ - struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; - int err = 0; - u32 id; - - err = ida_alloc_range(&tx_fs->tx_halloc, 1, - MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES, - GFP_KERNEL); - if (err < 0) - return err; - - id = err; - spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; - - /* Metadata match */ - MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a, - MLX5_ETH_WQE_FT_META_MACSEC_MASK); - MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a, - MLX5_ETH_WQE_FT_META_MACSEC | id << 2); - - *fs_id = id; - flow_act->crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC; - flow_act->crypto.obj_id = macsec_obj_id; - - mlx5_core_dbg(macsec_fs->mdev, "Tx fte: macsec obj_id %u, fs_id %u\n", macsec_obj_id, id); - return 0; -} - -static void macsec_fs_tx_create_sectag_header(const struct macsec_context *ctx, - char *reformatbf, - size_t *reformat_size) -{ - const struct macsec_secy *secy = ctx->secy; - bool sci_present = macsec_send_sci(secy); - struct mlx5_sectag_header sectag = {}; - const struct macsec_tx_sc *tx_sc; - - tx_sc = &secy->tx_sc; - sectag.ethertype = htons(ETH_P_MACSEC); - - if (sci_present) { - sectag.tci_an |= MACSEC_TCI_SC; - memcpy(§ag.sci, &secy->sci, - sizeof(sectag.sci)); - } else { - if (tx_sc->end_station) - sectag.tci_an |= MACSEC_TCI_ES; - if (tx_sc->scb) - sectag.tci_an |= MACSEC_TCI_SCB; - } - - /* With GCM, C/E clear for !encrypt, both set for encrypt */ - if (tx_sc->encrypt) - sectag.tci_an |= MACSEC_TCI_CONFID; - else if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN) - sectag.tci_an |= MACSEC_TCI_C; - - sectag.tci_an |= tx_sc->encoding_sa; - - *reformat_size = MLX5_MACSEC_TAG_LEN + (sci_present ? MACSEC_SCI_LEN : 0); - - memcpy(reformatbf, §ag, *reformat_size); -} - -static void macsec_fs_tx_del_rule(struct mlx5e_macsec_fs *macsec_fs, - struct mlx5e_macsec_tx_rule *tx_rule) -{ - if (tx_rule->rule) { - mlx5_del_flow_rules(tx_rule->rule); - tx_rule->rule = NULL; - } - - if (tx_rule->pkt_reformat) { - mlx5_packet_reformat_dealloc(macsec_fs->mdev, tx_rule->pkt_reformat); - tx_rule->pkt_reformat = NULL; - } - - if (tx_rule->fs_id) { - ida_free(&macsec_fs->tx_fs->tx_halloc, tx_rule->fs_id); - tx_rule->fs_id = 0; - } - - kfree(tx_rule); - - macsec_fs_tx_ft_put(macsec_fs); -} - -#define MLX5_REFORMAT_PARAM_ADD_MACSEC_OFFSET_4_BYTES 1 - -static union mlx5e_macsec_rule * -macsec_fs_tx_add_rule(struct mlx5e_macsec_fs *macsec_fs, - const struct macsec_context *macsec_ctx, - struct mlx5_macsec_rule_attrs *attrs, - u32 *sa_fs_id) -{ - char reformatbf[MLX5_MACSEC_TAG_LEN + MACSEC_SCI_LEN]; - struct mlx5_pkt_reformat_params reformat_params = {}; - struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; - struct net_device *netdev = macsec_fs->netdev; - union mlx5e_macsec_rule *macsec_rule = NULL; - struct mlx5_flow_destination dest = {}; - struct mlx5e_macsec_tables *tx_tables; - struct mlx5e_macsec_tx_rule *tx_rule; - struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_handle *rule; - struct mlx5_flow_spec *spec; - size_t reformat_size; - int err = 0; - u32 fs_id; - - tx_tables = &tx_fs->tables; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return NULL; - - err = macsec_fs_tx_ft_get(macsec_fs); - if (err) - goto out_spec; - - macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL); - if (!macsec_rule) { - macsec_fs_tx_ft_put(macsec_fs); - goto out_spec; - } - - tx_rule = &macsec_rule->tx_rule; - - /* Tx crypto table crypto rule */ - macsec_fs_tx_create_sectag_header(macsec_ctx, reformatbf, &reformat_size); - - reformat_params.type = MLX5_REFORMAT_TYPE_ADD_MACSEC; - reformat_params.size = reformat_size; - reformat_params.data = reformatbf; - - if (is_vlan_dev(macsec_ctx->netdev)) - reformat_params.param_0 = MLX5_REFORMAT_PARAM_ADD_MACSEC_OFFSET_4_BYTES; - - flow_act.pkt_reformat = mlx5_packet_reformat_alloc(macsec_fs->mdev, - &reformat_params, - MLX5_FLOW_NAMESPACE_EGRESS_MACSEC); - if (IS_ERR(flow_act.pkt_reformat)) { - err = PTR_ERR(flow_act.pkt_reformat); - netdev_err(netdev, "Failed to allocate MACsec Tx reformat context err=%d\n", err); - goto err; - } - tx_rule->pkt_reformat = flow_act.pkt_reformat; - - err = macsec_fs_tx_setup_fte(macsec_fs, spec, &flow_act, attrs->macsec_obj_id, &fs_id); - if (err) { - netdev_err(netdev, - "Failed to add packet reformat for MACsec TX crypto rule, err=%d\n", - err); - goto err; - } - - tx_rule->fs_id = fs_id; - *sa_fs_id = fs_id; - - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = tx_tables->ft_check; - rule = mlx5_add_flow_rules(tx_tables->ft_crypto.t, spec, &flow_act, &dest, 1); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - netdev_err(netdev, "Failed to add MACsec TX crypto rule, err=%d\n", err); - goto err; - } - tx_rule->rule = rule; - - goto out_spec; - -err: - macsec_fs_tx_del_rule(macsec_fs, tx_rule); - macsec_rule = NULL; -out_spec: - kvfree(spec); - - return macsec_rule; -} - -static void macsec_fs_tx_cleanup(struct mlx5e_macsec_fs *macsec_fs) -{ - struct mlx5e_macsec_tx *tx_fs = macsec_fs->tx_fs; - struct mlx5_core_dev *mdev = macsec_fs->mdev; - struct mlx5e_macsec_tables *tx_tables; - - if (!tx_fs) - return; - - tx_tables = &tx_fs->tables; - if (tx_tables->refcnt) { - netdev_err(macsec_fs->netdev, - "Can't destroy MACsec offload tx_fs, refcnt(%u) isn't 0\n", - tx_tables->refcnt); - return; - } - - ida_destroy(&tx_fs->tx_halloc); - - if (tx_tables->check_miss_rule_counter) { - mlx5_fc_destroy(mdev, tx_tables->check_miss_rule_counter); - tx_tables->check_miss_rule_counter = NULL; - } - - if (tx_tables->check_rule_counter) { - mlx5_fc_destroy(mdev, tx_tables->check_rule_counter); - tx_tables->check_rule_counter = NULL; - } - - kfree(tx_fs); - macsec_fs->tx_fs = NULL; -} - -static int macsec_fs_tx_init(struct mlx5e_macsec_fs *macsec_fs) -{ - struct net_device *netdev = macsec_fs->netdev; - struct mlx5_core_dev *mdev = macsec_fs->mdev; - struct mlx5e_macsec_tables *tx_tables; - struct mlx5e_macsec_tx *tx_fs; - struct mlx5_fc *flow_counter; - int err; - - tx_fs = kzalloc(sizeof(*tx_fs), GFP_KERNEL); - if (!tx_fs) - return -ENOMEM; - - tx_tables = &tx_fs->tables; - - flow_counter = mlx5_fc_create(mdev, false); - if (IS_ERR(flow_counter)) { - err = PTR_ERR(flow_counter); - netdev_err(netdev, - "Failed to create MACsec Tx encrypt flow counter, err(%d)\n", - err); - goto err_encrypt_counter; - } - tx_tables->check_rule_counter = flow_counter; - - flow_counter = mlx5_fc_create(mdev, false); - if (IS_ERR(flow_counter)) { - err = PTR_ERR(flow_counter); - netdev_err(netdev, - "Failed to create MACsec Tx drop flow counter, err(%d)\n", - err); - goto err_drop_counter; - } - tx_tables->check_miss_rule_counter = flow_counter; - - ida_init(&tx_fs->tx_halloc); - - macsec_fs->tx_fs = tx_fs; - - return 0; - -err_drop_counter: - mlx5_fc_destroy(mdev, tx_tables->check_rule_counter); - tx_tables->check_rule_counter = NULL; - -err_encrypt_counter: - kfree(tx_fs); - macsec_fs->tx_fs = NULL; - - return err; -} - -static void macsec_fs_rx_destroy(struct mlx5e_macsec_fs *macsec_fs) -{ - struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs; - struct mlx5e_macsec_tables *rx_tables; - int i; - - /* Rx check table */ - for (i = 1; i >= 0; --i) { - if (rx_fs->check_rule[i]) { - mlx5_del_flow_rules(rx_fs->check_rule[i]); - rx_fs->check_rule[i] = NULL; - } - - if (rx_fs->check_rule_pkt_reformat[i]) { - mlx5_packet_reformat_dealloc(macsec_fs->mdev, - rx_fs->check_rule_pkt_reformat[i]); - rx_fs->check_rule_pkt_reformat[i] = NULL; - } - } - - rx_tables = &rx_fs->tables; - - if (rx_tables->check_miss_rule) { - mlx5_del_flow_rules(rx_tables->check_miss_rule); - rx_tables->check_miss_rule = NULL; - } - - if (rx_tables->ft_check_group) { - mlx5_destroy_flow_group(rx_tables->ft_check_group); - rx_tables->ft_check_group = NULL; - } - - if (rx_tables->ft_check) { - mlx5_destroy_flow_table(rx_tables->ft_check); - rx_tables->ft_check = NULL; - } - - /* Rx crypto table */ - if (rx_tables->crypto_miss_rule) { - mlx5_del_flow_rules(rx_tables->crypto_miss_rule); - rx_tables->crypto_miss_rule = NULL; - } - - mlx5e_destroy_flow_table(&rx_tables->ft_crypto); -} - -static int macsec_fs_rx_create_crypto_table_groups(struct mlx5e_flow_table *ft) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - int mclen = MLX5_ST_SZ_BYTES(fte_match_param); - int ix = 0; - u32 *in; - int err; - u8 *mc; - - ft->g = kcalloc(RX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); - if (!ft->g) - return -ENOMEM; - - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) { - kfree(ft->g); - return -ENOMEM; - } - - mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - - /* Flow group for SA rule with SCI */ - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS | - MLX5_MATCH_MISC_PARAMETERS_5); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - - MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0, - MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << - MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); - MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_2); - MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_3); - - MLX5_SET_CFG(in, start_flow_index, ix); - ix += RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - /* Flow group for SA rule without SCI */ - memset(in, 0, inlen); - memset(mc, 0, mclen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS | - MLX5_MATCH_MISC_PARAMETERS_5); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_47_16); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_15_0); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - - MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0, - MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); - - MLX5_SET_CFG(in, start_flow_index, ix); - ix += RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - /* Flow Group for l2 traps */ - memset(in, 0, inlen); - memset(mc, 0, mclen); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - kvfree(in); - return 0; - -err: - err = PTR_ERR(ft->g[ft->num_groups]); - ft->g[ft->num_groups] = NULL; - kvfree(in); - - return err; -} - -static int macsec_fs_rx_create_check_decap_rule(struct mlx5e_macsec_fs *macsec_fs, - struct mlx5_flow_destination *dest, - struct mlx5_flow_act *flow_act, - struct mlx5_flow_spec *spec, - int reformat_param_size) -{ - int rule_index = (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI) ? 0 : 1; - u8 mlx5_reformat_buf[MLX5_SECTAG_HEADER_SIZE_WITH_SCI]; - struct mlx5_pkt_reformat_params reformat_params = {}; - struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs; - struct net_device *netdev = macsec_fs->netdev; - struct mlx5e_macsec_tables *rx_tables; - struct mlx5_flow_handle *rule; - int err = 0; - - rx_tables = &rx_fs->tables; - - /* Rx check table decap 16B rule */ - memset(dest, 0, sizeof(*dest)); - memset(flow_act, 0, sizeof(*flow_act)); - memset(spec, 0, sizeof(*spec)); - - reformat_params.type = MLX5_REFORMAT_TYPE_DEL_MACSEC; - reformat_params.size = reformat_param_size; - reformat_params.data = mlx5_reformat_buf; - flow_act->pkt_reformat = mlx5_packet_reformat_alloc(macsec_fs->mdev, - &reformat_params, - MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC); - if (IS_ERR(flow_act->pkt_reformat)) { - err = PTR_ERR(flow_act->pkt_reformat); - netdev_err(netdev, "Failed to allocate MACsec Rx reformat context err=%d\n", err); - return err; - } - rx_fs->check_rule_pkt_reformat[rule_index] = flow_act->pkt_reformat; - - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; - /* MACsec syndrome match */ - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.macsec_syndrome); - MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.macsec_syndrome, 0); - /* ASO return reg syndrome match */ - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4); - MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0); - - spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5; - /* Sectag TCI SC present bit*/ - MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0, - MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); - - if (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI) - MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0, - MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT << - MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); - - flow_act->flags = FLOW_ACT_NO_APPEND; - flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO | - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - dest->type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest->counter_id = mlx5_fc_id(rx_tables->check_rule_counter); - rule = mlx5_add_flow_rules(rx_tables->ft_check, spec, flow_act, dest, 1); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - netdev_err(netdev, "Failed to add MACsec Rx check rule, err=%d\n", err); - return err; - } - - rx_fs->check_rule[rule_index] = rule; - - return 0; -} - -static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs; - struct net_device *netdev = macsec_fs->netdev; - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_flow_destination dest = {}; - struct mlx5e_macsec_tables *rx_tables; - struct mlx5e_flow_table *ft_crypto; - struct mlx5_flow_table *flow_table; - struct mlx5_flow_group *flow_group; - struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_namespace *ns; - struct mlx5_flow_handle *rule; - struct mlx5_flow_spec *spec; - u32 *flow_group_in; - int err; - - ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC); - if (!ns) - return -ENOMEM; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return -ENOMEM; - - flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!flow_group_in) { - err = -ENOMEM; - goto free_spec; - } - - rx_tables = &rx_fs->tables; - ft_crypto = &rx_tables->ft_crypto; - - /* Rx crypto table */ - ft_attr.level = RX_CRYPTO_TABLE_LEVEL; - ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE; - - flow_table = mlx5_create_flow_table(ns, &ft_attr); - if (IS_ERR(flow_table)) { - err = PTR_ERR(flow_table); - netdev_err(netdev, "Failed to create MACsec Rx crypto table err(%d)\n", err); - goto out_flow_group; - } - ft_crypto->t = flow_table; - - /* Rx crypto table groups */ - err = macsec_fs_rx_create_crypto_table_groups(ft_crypto); - if (err) { - netdev_err(netdev, - "Failed to create default flow group for MACsec Tx crypto table err(%d)\n", - err); - goto err; - } - - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; - rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - netdev_err(netdev, - "Failed to add MACsec Rx crypto table default miss rule %d\n", - err); - goto err; - } - rx_tables->crypto_miss_rule = rule; - - /* Rx check table */ - flow_table = macsec_fs_auto_group_table_create(ns, - MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT, - RX_CHECK_TABLE_LEVEL, - RX_CHECK_TABLE_NUM_FTE); - if (IS_ERR(flow_table)) { - err = PTR_ERR(flow_table); - netdev_err(netdev, "fail to create MACsec RX check table, err(%d)\n", err); - goto err; - } - rx_tables->ft_check = flow_table; - - /* Rx check table Default miss group/rule */ - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1); - flow_group = mlx5_create_flow_group(rx_tables->ft_check, flow_group_in); - if (IS_ERR(flow_group)) { - err = PTR_ERR(flow_group); - netdev_err(netdev, - "Failed to create default flow group for MACsec Rx check table err(%d)\n", - err); - goto err; - } - rx_tables->ft_check_group = flow_group; - - /* Rx check table default drop rule */ - memset(&flow_act, 0, sizeof(flow_act)); - - dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(rx_tables->check_miss_rule_counter); - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; - rule = mlx5_add_flow_rules(rx_tables->ft_check, NULL, &flow_act, &dest, 1); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - netdev_err(netdev, "Failed to added MACsec Rx check drop rule, err(%d)\n", err); - goto err; - } - rx_tables->check_miss_rule = rule; - - /* Rx check table decap rules */ - err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec, - MLX5_SECTAG_HEADER_SIZE_WITH_SCI); - if (err) - goto err; - - err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec, - MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI); - if (err) - goto err; - - goto out_flow_group; - -err: - macsec_fs_rx_destroy(macsec_fs); -out_flow_group: - kvfree(flow_group_in); -free_spec: - kvfree(spec); - return err; -} - -static int macsec_fs_rx_ft_get(struct mlx5e_macsec_fs *macsec_fs) -{ - struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables; - int err = 0; - - if (rx_tables->refcnt) - goto out; - - err = macsec_fs_rx_create(macsec_fs); - if (err) - return err; - -out: - rx_tables->refcnt++; - return err; -} - -static void macsec_fs_rx_ft_put(struct mlx5e_macsec_fs *macsec_fs) -{ - struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables; - - if (--rx_tables->refcnt) - return; - - macsec_fs_rx_destroy(macsec_fs); -} - -static void macsec_fs_rx_del_rule(struct mlx5e_macsec_fs *macsec_fs, - struct mlx5e_macsec_rx_rule *rx_rule) -{ - int i; - - for (i = 0; i < RX_NUM_OF_RULES_PER_SA; ++i) { - if (rx_rule->rule[i]) { - mlx5_del_flow_rules(rx_rule->rule[i]); - rx_rule->rule[i] = NULL; - } - } - - if (rx_rule->meta_modhdr) { - mlx5_modify_header_dealloc(macsec_fs->mdev, rx_rule->meta_modhdr); - rx_rule->meta_modhdr = NULL; - } - - kfree(rx_rule); - - macsec_fs_rx_ft_put(macsec_fs); -} - -static void macsec_fs_rx_setup_fte(struct mlx5_flow_spec *spec, - struct mlx5_flow_act *flow_act, - struct mlx5_macsec_rule_attrs *attrs, - bool sci_present) -{ - u8 tci_an = (sci_present << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET) | attrs->assoc_num; - struct mlx5_flow_act_crypto_params *crypto_params = &flow_act->crypto; - __be32 *sci_p = (__be32 *)(&attrs->sci); - - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - - /* MACsec ethertype */ - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_MACSEC); - - spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5; - - /* Sectag AN + TCI SC present bit*/ - MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0, - MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); - MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0, - tci_an << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); - - if (sci_present) { - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, - misc_parameters_5.macsec_tag_2); - MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_2, - be32_to_cpu(sci_p[0])); - - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, - misc_parameters_5.macsec_tag_3); - MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_3, - be32_to_cpu(sci_p[1])); - } else { - /* When SCI isn't present in the Sectag, need to match the source */ - /* MAC address only if the SCI contains the default MACsec PORT */ - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16); - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0); - memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.smac_47_16), - sci_p, ETH_ALEN); - } - - crypto_params->type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC; - crypto_params->obj_id = attrs->macsec_obj_id; -} - -static union mlx5e_macsec_rule * -macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs, - struct mlx5_macsec_rule_attrs *attrs, - u32 fs_id) -{ - u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; - struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs; - struct net_device *netdev = macsec_fs->netdev; - union mlx5e_macsec_rule *macsec_rule = NULL; - struct mlx5_modify_hdr *modify_hdr = NULL; - struct mlx5_flow_destination dest = {}; - struct mlx5e_macsec_tables *rx_tables; - struct mlx5e_macsec_rx_rule *rx_rule; - struct mlx5_flow_act flow_act = {}; - struct mlx5e_flow_table *ft_crypto; - struct mlx5_flow_handle *rule; - struct mlx5_flow_spec *spec; - int err = 0; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return NULL; - - err = macsec_fs_rx_ft_get(macsec_fs); - if (err) - goto out_spec; - - macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL); - if (!macsec_rule) { - macsec_fs_rx_ft_put(macsec_fs); - goto out_spec; - } - - rx_rule = &macsec_rule->rx_rule; - rx_tables = &rx_fs->tables; - ft_crypto = &rx_tables->ft_crypto; - - /* Set bit[31 - 30] macsec marker - 0x01 */ - /* Set bit[15-0] fs id */ - MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); - MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); - MLX5_SET(set_action_in, action, data, MLX5_MACSEC_RX_METADAT_HANDLE(fs_id) | BIT(30)); - MLX5_SET(set_action_in, action, offset, 0); - MLX5_SET(set_action_in, action, length, 32); - - modify_hdr = mlx5_modify_header_alloc(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC, - 1, action); - if (IS_ERR(modify_hdr)) { - err = PTR_ERR(modify_hdr); - netdev_err(netdev, "fail to alloc MACsec set modify_header_id err=%d\n", err); - modify_hdr = NULL; - goto err; - } - rx_rule->meta_modhdr = modify_hdr; - - /* Rx crypto table with SCI rule */ - macsec_fs_rx_setup_fte(spec, &flow_act, attrs, true); - - flow_act.modify_hdr = modify_hdr; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | - MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - - dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = rx_tables->ft_check; - rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - netdev_err(netdev, - "Failed to add SA with SCI rule to Rx crypto rule, err=%d\n", - err); - goto err; - } - rx_rule->rule[0] = rule; - - /* Rx crypto table without SCI rule */ - if ((cpu_to_be64((__force u64)attrs->sci) & 0xFFFF) == ntohs(MACSEC_PORT_ES)) { - memset(spec, 0, sizeof(struct mlx5_flow_spec)); - memset(&dest, 0, sizeof(struct mlx5_flow_destination)); - memset(&flow_act, 0, sizeof(flow_act)); - - macsec_fs_rx_setup_fte(spec, &flow_act, attrs, false); - - flow_act.modify_hdr = modify_hdr; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | - MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - - dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = rx_tables->ft_check; - rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - netdev_err(netdev, - "Failed to add SA without SCI rule to Rx crypto rule, err=%d\n", - err); - goto err; - } - rx_rule->rule[1] = rule; - } - - kvfree(spec); - return macsec_rule; - -err: - macsec_fs_rx_del_rule(macsec_fs, rx_rule); - macsec_rule = NULL; -out_spec: - kvfree(spec); - return macsec_rule; -} - -static int macsec_fs_rx_init(struct mlx5e_macsec_fs *macsec_fs) -{ - struct net_device *netdev = macsec_fs->netdev; - struct mlx5_core_dev *mdev = macsec_fs->mdev; - struct mlx5e_macsec_tables *rx_tables; - struct mlx5e_macsec_rx *rx_fs; - struct mlx5_fc *flow_counter; - int err; - - rx_fs = kzalloc(sizeof(*rx_fs), GFP_KERNEL); - if (!rx_fs) - return -ENOMEM; - - flow_counter = mlx5_fc_create(mdev, false); - if (IS_ERR(flow_counter)) { - err = PTR_ERR(flow_counter); - netdev_err(netdev, - "Failed to create MACsec Rx encrypt flow counter, err(%d)\n", - err); - goto err_encrypt_counter; - } - - rx_tables = &rx_fs->tables; - rx_tables->check_rule_counter = flow_counter; - - flow_counter = mlx5_fc_create(mdev, false); - if (IS_ERR(flow_counter)) { - err = PTR_ERR(flow_counter); - netdev_err(netdev, - "Failed to create MACsec Rx drop flow counter, err(%d)\n", - err); - goto err_drop_counter; - } - rx_tables->check_miss_rule_counter = flow_counter; - - macsec_fs->rx_fs = rx_fs; - - return 0; - -err_drop_counter: - mlx5_fc_destroy(mdev, rx_tables->check_rule_counter); - rx_tables->check_rule_counter = NULL; - -err_encrypt_counter: - kfree(rx_fs); - macsec_fs->rx_fs = NULL; - - return err; -} - -static void macsec_fs_rx_cleanup(struct mlx5e_macsec_fs *macsec_fs) -{ - struct mlx5e_macsec_rx *rx_fs = macsec_fs->rx_fs; - struct mlx5_core_dev *mdev = macsec_fs->mdev; - struct mlx5e_macsec_tables *rx_tables; - - if (!rx_fs) - return; - - rx_tables = &rx_fs->tables; - - if (rx_tables->refcnt) { - netdev_err(macsec_fs->netdev, - "Can't destroy MACsec offload rx_fs, refcnt(%u) isn't 0\n", - rx_tables->refcnt); - return; - } - - if (rx_tables->check_miss_rule_counter) { - mlx5_fc_destroy(mdev, rx_tables->check_miss_rule_counter); - rx_tables->check_miss_rule_counter = NULL; - } - - if (rx_tables->check_rule_counter) { - mlx5_fc_destroy(mdev, rx_tables->check_rule_counter); - rx_tables->check_rule_counter = NULL; - } - - kfree(rx_fs); - macsec_fs->rx_fs = NULL; -} - -void mlx5e_macsec_fs_get_stats_fill(struct mlx5e_macsec_fs *macsec_fs, void *macsec_stats) -{ - struct mlx5e_macsec_stats *stats = (struct mlx5e_macsec_stats *)macsec_stats; - struct mlx5e_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables; - struct mlx5e_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables; - struct mlx5_core_dev *mdev = macsec_fs->mdev; - - if (tx_tables->check_rule_counter) - mlx5_fc_query(mdev, tx_tables->check_rule_counter, - &stats->macsec_tx_pkts, &stats->macsec_tx_bytes); - - if (tx_tables->check_miss_rule_counter) - mlx5_fc_query(mdev, tx_tables->check_miss_rule_counter, - &stats->macsec_tx_pkts_drop, &stats->macsec_tx_bytes_drop); - - if (rx_tables->check_rule_counter) - mlx5_fc_query(mdev, rx_tables->check_rule_counter, - &stats->macsec_rx_pkts, &stats->macsec_rx_bytes); - - if (rx_tables->check_miss_rule_counter) - mlx5_fc_query(mdev, rx_tables->check_miss_rule_counter, - &stats->macsec_rx_pkts_drop, &stats->macsec_rx_bytes_drop); -} - -union mlx5e_macsec_rule * -mlx5e_macsec_fs_add_rule(struct mlx5e_macsec_fs *macsec_fs, - const struct macsec_context *macsec_ctx, - struct mlx5_macsec_rule_attrs *attrs, - u32 *sa_fs_id) -{ - return (attrs->action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ? - macsec_fs_tx_add_rule(macsec_fs, macsec_ctx, attrs, sa_fs_id) : - macsec_fs_rx_add_rule(macsec_fs, attrs, *sa_fs_id); -} - -void mlx5e_macsec_fs_del_rule(struct mlx5e_macsec_fs *macsec_fs, - union mlx5e_macsec_rule *macsec_rule, - int action) -{ - (action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ? - macsec_fs_tx_del_rule(macsec_fs, &macsec_rule->tx_rule) : - macsec_fs_rx_del_rule(macsec_fs, &macsec_rule->rx_rule); -} - -void mlx5e_macsec_fs_cleanup(struct mlx5e_macsec_fs *macsec_fs) -{ - macsec_fs_rx_cleanup(macsec_fs); - macsec_fs_tx_cleanup(macsec_fs); - kfree(macsec_fs); -} - -struct mlx5e_macsec_fs * -mlx5e_macsec_fs_init(struct mlx5_core_dev *mdev, - struct net_device *netdev) -{ - struct mlx5e_macsec_fs *macsec_fs; - int err; - - macsec_fs = kzalloc(sizeof(*macsec_fs), GFP_KERNEL); - if (!macsec_fs) - return NULL; - - macsec_fs->mdev = mdev; - macsec_fs->netdev = netdev; - - err = macsec_fs_tx_init(macsec_fs); - if (err) { - netdev_err(netdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err); - goto err; - } - - err = macsec_fs_rx_init(macsec_fs); - if (err) { - netdev_err(netdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err); - goto tx_cleanup; - } - - return macsec_fs; - -tx_cleanup: - macsec_fs_tx_cleanup(macsec_fs); -err: - kfree(macsec_fs); - return NULL; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h deleted file mode 100644 index b429648d4ee7..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.h +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ -/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ - -#ifndef __MLX5_MACSEC_STEERING_H__ -#define __MLX5_MACSEC_STEERING_H__ - -#ifdef CONFIG_MLX5_EN_MACSEC - -#include "en_accel/macsec.h" - -#define MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES 16 - -struct mlx5e_macsec_fs; -union mlx5e_macsec_rule; - -struct mlx5_macsec_rule_attrs { - sci_t sci; - u32 macsec_obj_id; - u8 assoc_num; - int action; -}; - -enum mlx5_macsec_action { - MLX5_ACCEL_MACSEC_ACTION_ENCRYPT, - MLX5_ACCEL_MACSEC_ACTION_DECRYPT, -}; - -void mlx5e_macsec_fs_cleanup(struct mlx5e_macsec_fs *macsec_fs); - -struct mlx5e_macsec_fs * -mlx5e_macsec_fs_init(struct mlx5_core_dev *mdev, struct net_device *netdev); - -union mlx5e_macsec_rule * -mlx5e_macsec_fs_add_rule(struct mlx5e_macsec_fs *macsec_fs, - const struct macsec_context *ctx, - struct mlx5_macsec_rule_attrs *attrs, - u32 *sa_fs_id); - -void mlx5e_macsec_fs_del_rule(struct mlx5e_macsec_fs *macsec_fs, - union mlx5e_macsec_rule *macsec_rule, - int action); - -void mlx5e_macsec_fs_get_stats_fill(struct mlx5e_macsec_fs *macsec_fs, void *macsec_stats); - -#endif - -#endif /* __MLX5_MACSEC_STEERING_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c index e50a2e3f3d18..4559ee16a11a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c @@ -8,14 +8,14 @@ #include "en_accel/macsec.h" static const struct counter_desc mlx5e_macsec_hw_stats_desc[] = { - { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_pkts) }, - { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_pkts_drop) }, - { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_rx_bytes_drop) }, - { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_pkts) }, - { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_pkts_drop) }, - { MLX5E_DECLARE_STAT(struct mlx5e_macsec_stats, macsec_tx_bytes_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5_macsec_stats, macsec_rx_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5_macsec_stats, macsec_rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5_macsec_stats, macsec_rx_pkts_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5_macsec_stats, macsec_rx_bytes_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5_macsec_stats, macsec_tx_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5_macsec_stats, macsec_tx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5_macsec_stats, macsec_tx_pkts_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5_macsec_stats, macsec_tx_bytes_drop) }, }; #define NUM_MACSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_macsec_hw_stats_desc) @@ -52,6 +52,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(macsec_hw) static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(macsec_hw) { + struct mlx5_macsec_fs *macsec_fs; int i; if (!priv->macsec) @@ -60,9 +61,10 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(macsec_hw) if (!mlx5e_is_macsec_device(priv->mdev)) return idx; - mlx5e_macsec_get_stats_fill(priv->macsec, mlx5e_macsec_get_stats(priv->macsec)); + macsec_fs = priv->mdev->macsec_fs; + mlx5_macsec_fs_get_stats_fill(macsec_fs, mlx5_macsec_fs_get_stats(macsec_fs)); for (i = 0; i < NUM_MACSEC_HW_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(mlx5e_macsec_get_stats(priv->macsec), + data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_macsec_fs_get_stats(macsec_fs), mlx5e_macsec_hw_stats_desc, i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 933a7772a7a3..bb7f86c993e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -135,6 +135,16 @@ static void arfs_del_rules(struct mlx5e_flow_steering *fs); int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs) { + /* Moving to switchdev mode, fs->arfs is freed by mlx5e_nic_profile + * cleanup_rx callback and it is not recreated when + * mlx5e_uplink_rep_profile is loaded as mlx5e_create_flow_steering() + * is not called by the uplink_rep profile init_rx callback. Thus, if + * ntuple is set, moving to switchdev flow will enter this function + * with fs->arfs nullified. + */ + if (!mlx5e_fs_get_arfs(fs)) + return 0; + arfs_del_rules(fs); return arfs_disable(fs); @@ -422,8 +432,10 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv) } spin_unlock_bh(&arfs->arfs_lock); hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) { - if (arfs_rule->rule) + if (arfs_rule->rule) { mlx5_del_flow_rules(arfs_rule->rule); + priv->channel_stats[arfs_rule->rxq]->rq.arfs_expired++; + } hlist_del(&arfs_rule->hlist); kfree(arfs_rule); } @@ -499,6 +511,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { + priv->channel_stats[arfs_rule->rxq]->rq.arfs_err++; err = -ENOMEM; goto out; } @@ -509,6 +522,8 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, ntohs(tuple->etype)); arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype); if (!arfs_table) { + WARN_ONCE(1, "arfs table does not exist for etype %u and ip_proto %u\n", + tuple->etype, tuple->ip_proto); err = -EINVAL; goto out; } @@ -590,9 +605,11 @@ static void arfs_modify_rule_rq(struct mlx5e_priv *priv, dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR; dst.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, rxq); err = mlx5_modify_rule_destination(rule, &dst, NULL); - if (err) + if (err) { + priv->channel_stats[rxq]->rq.arfs_err++; netdev_warn(priv->netdev, "Failed to modify aRFS rule destination to rq=%d\n", rxq); + } } static void arfs_handle_work(struct work_struct *work) @@ -622,6 +639,7 @@ static void arfs_handle_work(struct work_struct *work) if (IS_ERR(rule)) goto out; arfs_rule->rule = rule; + priv->channel_stats[arfs_rule->rxq]->rq.arfs_add++; } else { arfs_modify_rule_rq(priv, arfs_rule->rule, arfs_rule->rxq); @@ -640,8 +658,10 @@ static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, struct arfs_tuple *tuple; rule = kzalloc(sizeof(*rule), GFP_ATOMIC); - if (!rule) + if (!rule) { + priv->channel_stats[rxq]->rq.arfs_err++; return NULL; + } rule->priv = priv; rule->rxq = rxq; @@ -730,10 +750,13 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, spin_lock_bh(&arfs->arfs_lock); arfs_rule = arfs_find_rule(arfs_t, &fk); if (arfs_rule) { - if (arfs_rule->rxq == rxq_index) { + if (arfs_rule->rxq == rxq_index || work_busy(&arfs_rule->arfs_work)) { spin_unlock_bh(&arfs->arfs_lock); return arfs_rule->filter_id; } + + priv->channel_stats[rxq_index]->rq.arfs_request_in++; + priv->channel_stats[arfs_rule->rxq]->rq.arfs_request_out++; arfs_rule->rxq = rxq_index; } else { arfs_rule = arfs_alloc_rule(priv, arfs_t, &fk, rxq_index, flow_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 27861b68ced5..dff02434ff45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -2061,7 +2061,8 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable) struct mlx5e_params new_params; int err; - if (!MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) + if (!MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn) || + !MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter)) return -EOPNOTSUPP; /* Don't allow changing the PTP state if HTB offload is active, because @@ -2163,8 +2164,8 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev) return priv->channels.params.pflags; } -int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, - u32 *rule_locs) +static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, + u32 *rule_locs) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -2181,7 +2182,7 @@ int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, return mlx5e_ethtool_get_rxnfc(priv, info, rule_locs); } -int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) { struct mlx5e_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index aac32e505c14..3eccdadc0357 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -96,10 +96,6 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, case UDP_V4_FLOW: case TCP_V6_FLOW: case UDP_V6_FLOW: - max_tuples = ETHTOOL_NUM_L3_L4_FTS; - prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples); - eth_ft = ðtool->l3_l4_ft[prio]; - break; case IP_USER_FLOW: case IPV6_USER_FLOW: max_tuples = ETHTOOL_NUM_L3_L4_FTS; @@ -900,10 +896,16 @@ static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, struct ethtool_rxnfc *nfc) { u8 rx_hash_field = 0; + u32 flow_type = 0; + u32 rss_idx = 0; int err; int tt; - tt = flow_type_to_traffic_type(nfc->flow_type); + if (nfc->flow_type & FLOW_RSS) + rss_idx = nfc->rss_context; + + flow_type = flow_type_mask(nfc->flow_type); + tt = flow_type_to_traffic_type(flow_type); if (tt < 0) return tt; @@ -911,10 +913,10 @@ static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, * on src IP, dest IP, TCP/UDP src port and TCP/UDP dest * port. */ - if (nfc->flow_type != TCP_V4_FLOW && - nfc->flow_type != TCP_V6_FLOW && - nfc->flow_type != UDP_V4_FLOW && - nfc->flow_type != UDP_V6_FLOW) + if (flow_type != TCP_V4_FLOW && + flow_type != TCP_V6_FLOW && + flow_type != UDP_V4_FLOW && + flow_type != UDP_V6_FLOW) return -EOPNOTSUPP; if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | @@ -931,7 +933,7 @@ static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT; mutex_lock(&priv->state_lock); - err = mlx5e_rx_res_rss_set_hash_fields(priv->rx_res, tt, rx_hash_field); + err = mlx5e_rx_res_rss_set_hash_fields(priv->rx_res, rss_idx, tt, rx_hash_field); mutex_unlock(&priv->state_lock); return err; @@ -940,14 +942,23 @@ static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv, struct ethtool_rxnfc *nfc) { - u32 hash_field = 0; + int hash_field = 0; + u32 flow_type = 0; + u32 rss_idx = 0; int tt; - tt = flow_type_to_traffic_type(nfc->flow_type); + if (nfc->flow_type & FLOW_RSS) + rss_idx = nfc->rss_context; + + flow_type = flow_type_mask(nfc->flow_type); + tt = flow_type_to_traffic_type(flow_type); if (tt < 0) return tt; - hash_field = mlx5e_rx_res_rss_get_hash_fields(priv->rx_res, tt); + hash_field = mlx5e_rx_res_rss_get_hash_fields(priv->rx_res, rss_idx, tt); + if (hash_field < 0) + return hash_field; + nfc->data = 0; if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index defb1efccb78..a2ae791538ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -38,7 +38,7 @@ #include <linux/debugfs.h> #include <linux/if_bridge.h> #include <linux/filter.h> -#include <net/page_pool.h> +#include <net/page_pool/types.h> #include <net/pkt_sched.h> #include <net/xdp_sock_drv.h> #include "eswitch.h" @@ -1036,7 +1036,23 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_s return err; } -static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) +static void mlx5e_flush_rq_cq(struct mlx5e_rq *rq) +{ + struct mlx5_cqwq *cqwq = &rq->cq.wq; + struct mlx5_cqe64 *cqe; + + if (test_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state)) { + while ((cqe = mlx5_cqwq_get_cqe_enahnced_comp(cqwq))) + mlx5_cqwq_pop(cqwq); + } else { + while ((cqe = mlx5_cqwq_get_cqe(cqwq))) + mlx5_cqwq_pop(cqwq); + } + + mlx5_cqwq_update_db_record(cqwq); +} + +int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state) { struct net_device *dev = rq->netdev; int err; @@ -1046,6 +1062,10 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn); return err; } + + mlx5e_free_rx_descs(rq); + mlx5e_flush_rq_cq(rq); + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err) { netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn); @@ -1055,13 +1075,6 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) return 0; } -int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state) -{ - mlx5e_free_rx_descs(rq); - - return mlx5e_rq_to_ready(rq, curr_state); -} - static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) { struct mlx5_core_dev *mdev = rq->mdev; @@ -1285,11 +1298,13 @@ static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa) { struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - int entries = wq_sz * MLX5_SEND_WQEBB_NUM_DS * 2; /* upper bound for maximum num of - * entries of all xmit_modes. - */ + int entries; size_t size; + /* upper bound for maximum num of entries of all xmit_modes. */ + entries = roundup_pow_of_two(wq_sz * MLX5_SEND_WQEBB_NUM_DS * + MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO); + size = array_size(sizeof(*xdpi_fifo->xi), entries); xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa); if (!xdpi_fifo->xi) @@ -1976,7 +1991,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) int eqn; int err; - err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn); + err = mlx5_comp_eqn_get(mdev, param->eq_ix, &eqn); if (err) return err; @@ -2432,14 +2447,14 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct xsk_buff_pool *xsk_pool, struct mlx5e_channel **cp) { - int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); + int cpu = mlx5_comp_vector_get_cpu(priv->mdev, ix); struct net_device *netdev = priv->netdev; struct mlx5e_xsk_param xsk; struct mlx5e_channel *c; unsigned int irq; int err; - err = mlx5_vector2irqn(priv->mdev, ix, &irq); + err = mlx5_comp_irqn_get(priv->mdev, ix, &irq); if (err) return err; @@ -2843,13 +2858,13 @@ static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, struct mlx5_core_dev *mdev = priv->mdev; int num_comp_vectors, ix, irq; - num_comp_vectors = mlx5_comp_vectors_count(mdev); + num_comp_vectors = mlx5_comp_vectors_max(mdev); for (ix = 0; ix < params->num_channels; ix++) { cpumask_clear(priv->scratchpad.cpumask); for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) { - int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(mdev, irq)); + int cpu = mlx5_comp_vector_get_cpu(mdev, irq); cpumask_set_cpu(cpu, priv->scratchpad.cpumask); } @@ -4883,9 +4898,6 @@ static int mlx5e_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, if (nla_type(attr) != IFLA_BRIDGE_MODE) continue; - if (nla_len(attr) < sizeof(mode)) - return -EINVAL; - mode = nla_get_u16(attr); if (mode > BRIDGE_MODE_VEPA) return -EINVAL; @@ -5253,6 +5265,7 @@ void mlx5e_destroy_q_counters(struct mlx5e_priv *priv) static int mlx5e_nic_init(struct mlx5_core_dev *mdev, struct net_device *netdev) { + const bool take_rtnl = netdev->reg_state == NETREG_REGISTERED; struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_flow_steering *fs; int err; @@ -5281,9 +5294,19 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); mlx5e_health_create_reporters(priv); + + /* If netdev is already registered (e.g. move from uplink to nic profile), + * RTNL lock must be held before triggering netdev notifiers. + */ + if (take_rtnl) + rtnl_lock(); + /* update XDP supported features */ mlx5e_set_xdp_feature(netdev); + if (take_rtnl) + rtnl_unlock(); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 152b62138450..2fdb8895aecd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -399,15 +399,13 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, } static int mlx5e_sqs2vport_add_peers_rules(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, - struct mlx5_devcom *devcom, struct mlx5e_rep_sq *rep_sq, int i) { - struct mlx5_eswitch *peer_esw = NULL; struct mlx5_flow_handle *flow_rule; - int tmp; + struct mlx5_devcom_comp_dev *tmp; + struct mlx5_eswitch *peer_esw; - mlx5_devcom_for_each_peer_entry(devcom, MLX5_DEVCOM_ESW_OFFLOADS, - peer_esw, tmp) { + mlx5_devcom_for_each_peer_entry(esw->devcom, peer_esw, tmp) { u16 peer_rule_idx = MLX5_CAP_GEN(peer_esw->dev, vhca_id); struct mlx5e_rep_sq_peer *sq_peer; int err; @@ -443,7 +441,6 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, struct mlx5_flow_handle *flow_rule; struct mlx5e_rep_priv *rpriv; struct mlx5e_rep_sq *rep_sq; - struct mlx5_devcom *devcom; bool devcom_locked = false; int err; int i; @@ -451,10 +448,10 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, if (esw->mode != MLX5_ESWITCH_OFFLOADS) return 0; - devcom = esw->dev->priv.devcom; rpriv = mlx5e_rep_to_rep_priv(rep); - if (mlx5_devcom_comp_is_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS) && - mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) + + if (mlx5_devcom_comp_is_ready(esw->devcom) && + mlx5_devcom_for_each_peer_begin(esw->devcom)) devcom_locked = true; for (i = 0; i < sqns_num; i++) { @@ -477,7 +474,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, xa_init(&rep_sq->sq_peer); if (devcom_locked) { - err = mlx5e_sqs2vport_add_peers_rules(esw, rep, devcom, rep_sq, i); + err = mlx5e_sqs2vport_add_peers_rules(esw, rep, rep_sq, i); if (err) { mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule); xa_destroy(&rep_sq->sq_peer); @@ -490,7 +487,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, } if (devcom_locked) - mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + mlx5_devcom_for_each_peer_end(esw->devcom); return 0; @@ -498,7 +495,7 @@ out_err: mlx5e_sqs2vport_stop(esw, rep); if (devcom_locked) - mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + mlx5_devcom_for_each_peer_end(esw->devcom); return err; } @@ -1012,7 +1009,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) err = mlx5e_open_drop_rq(priv, &priv->drop_rq); if (err) { mlx5_core_err(mdev, "open drop rq failed, %d\n", err); - return err; + goto err_rx_res_free; } err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0, @@ -1046,6 +1043,7 @@ err_destroy_rx_res: mlx5e_rx_res_destroy(priv->rx_res); err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); +err_rx_res_free: mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; err_free_fs: @@ -1159,6 +1157,10 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) return err; } + err = mlx5e_rep_neigh_init(rpriv); + if (err) + goto err_neigh_init; + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { err = mlx5e_init_uplink_rep_tx(rpriv); if (err) @@ -1175,6 +1177,8 @@ err_ht_init: if (rpriv->rep->vport == MLX5_VPORT_UPLINK) mlx5e_cleanup_uplink_rep_tx(rpriv); err_init_tx: + mlx5e_rep_neigh_cleanup(rpriv); +err_neigh_init: mlx5e_destroy_tises(priv); return err; } @@ -1188,22 +1192,17 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) if (rpriv->rep->vport == MLX5_VPORT_UPLINK) mlx5e_cleanup_uplink_rep_tx(rpriv); + mlx5e_rep_neigh_cleanup(rpriv); mlx5e_destroy_tises(priv); } static void mlx5e_rep_enable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; - mlx5e_set_netdev_mtu_boundaries(priv); - mlx5e_rep_neigh_init(rpriv); } static void mlx5e_rep_disable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; - - mlx5e_rep_neigh_cleanup(rpriv); } static int mlx5e_update_rep_rx(struct mlx5e_priv *priv) @@ -1253,7 +1252,6 @@ static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; u16 max_mtu; @@ -1275,7 +1273,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) mlx5_notifier_register(mdev, &priv->events_nb); mlx5e_dcbnl_initialize(priv); mlx5e_dcbnl_init_app(priv); - mlx5e_rep_neigh_init(rpriv); mlx5e_rep_bridge_init(priv); netdev->wanted_features |= NETIF_F_HW_TC; @@ -1290,7 +1287,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_core_dev *mdev = priv->mdev; rtnl_lock(); @@ -1300,7 +1296,6 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) rtnl_unlock(); mlx5e_rep_bridge_cleanup(priv); - mlx5e_rep_neigh_cleanup(rpriv); mlx5e_dcbnl_delete_app(priv); mlx5_notifier_unregister(mdev, &priv->events_nb); mlx5e_rep_tc_disable(priv); @@ -1341,6 +1336,7 @@ static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = { &MLX5E_STATS_GRP(channels), &MLX5E_STATS_GRP(per_port_buff_congest), #ifdef CONFIG_MLX5_EN_IPSEC + &MLX5E_STATS_GRP(ipsec_hw), &MLX5E_STATS_GRP(ipsec_sw), #endif &MLX5E_STATS_GRP(ptp), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 41d37159e027..3fd11b0761e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -36,7 +36,7 @@ #include <linux/bitmap.h> #include <linux/filter.h> #include <net/ip6_checksum.h> -#include <net/page_pool.h> +#include <net/page_pool/helpers.h> #include <net/inet_ecn.h> #include <net/gro.h> #include <net/udp.h> @@ -1543,7 +1543,8 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, mlx5e_ktls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt); if (unlikely(mlx5_ipsec_is_rx_flow(cqe))) - mlx5e_ipsec_offload_handle_rx_skb(netdev, skb, cqe); + mlx5e_ipsec_offload_handle_rx_skb(netdev, skb, + be32_to_cpu(cqe->ft_metadata)); if (unlikely(mlx5e_macsec_is_rx_flow(cqe))) mlx5e_macsec_offload_handle_rx_skb(netdev, skb, cqe); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 4d77055abd4b..4b96ad657145 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -38,7 +38,7 @@ #include "en/port.h" #ifdef CONFIG_PAGE_POOL_STATS -#include <net/page_pool.h> +#include <net/page_pool/helpers.h> #endif static unsigned int stats_grps_num(struct mlx5e_priv *priv) @@ -180,7 +180,13 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) }, +#ifdef CONFIG_MLX5_EN_ARFS + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_add) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_request_in) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_request_out) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_expired) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) }, +#endif { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) }, #ifdef CONFIG_PAGE_POOL_STATS { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_fast) }, @@ -231,7 +237,6 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_blks) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_pkts) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_congst_umr) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_arfs_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_xmit) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_mpwqe) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_inlnw) }, @@ -321,7 +326,6 @@ static void mlx5e_stats_grp_sw_update_stats_xskrq(struct mlx5e_sw_stats *s, s->rx_xsk_cqe_compress_blks += xskrq_stats->cqe_compress_blks; s->rx_xsk_cqe_compress_pkts += xskrq_stats->cqe_compress_pkts; s->rx_xsk_congst_umr += xskrq_stats->congst_umr; - s->rx_xsk_arfs_err += xskrq_stats->arfs_err; } static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s, @@ -354,7 +358,13 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s, s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; s->rx_congst_umr += rq_stats->congst_umr; +#ifdef CONFIG_MLX5_EN_ARFS + s->rx_arfs_add += rq_stats->arfs_add; + s->rx_arfs_request_in += rq_stats->arfs_request_in; + s->rx_arfs_request_out += rq_stats->arfs_request_out; + s->rx_arfs_expired += rq_stats->arfs_expired; s->rx_arfs_err += rq_stats->arfs_err; +#endif s->rx_recover += rq_stats->recover; #ifdef CONFIG_PAGE_POOL_STATS s->rx_pp_alloc_fast += rq_stats->pp_alloc_fast; @@ -1990,7 +2000,13 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) }, +#ifdef CONFIG_MLX5_EN_ARFS + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_add) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_request_in) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_request_out) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_expired) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) }, +#endif { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) }, #ifdef CONFIG_PAGE_POOL_STATS { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_fast) }, @@ -2092,7 +2108,6 @@ static const struct counter_desc xskrq_stats_desc[] = { { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, congst_umr) }, - { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, arfs_err) }, }; static const struct counter_desc xsksq_stats_desc[] = { @@ -2142,9 +2157,7 @@ static const struct counter_desc ptp_cq_stats_desc[] = { { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, err_cqe) }, { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort) }, { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) }, - { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_cqe) }, - { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_event) }, - { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, ooo_cqe_drop) }, + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, late_cqe) }, }; static const struct counter_desc ptp_rq_stats_desc[] = { @@ -2170,7 +2183,6 @@ static const struct counter_desc ptp_rq_stats_desc[] = { { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) }, - { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, arfs_err) }, { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) }, }; @@ -2490,7 +2502,7 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = { &MLX5E_STATS_GRP(per_port_buff_congest), &MLX5E_STATS_GRP(ptp), &MLX5E_STATS_GRP(qos), -#ifdef CONFIG_MLX5_EN_MACSEC +#ifdef CONFIG_MLX5_MACSEC &MLX5E_STATS_GRP(macsec_hw), #endif }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 1ff8a06027dc..176fa5976259 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -194,7 +194,13 @@ struct mlx5e_sw_stats { u64 rx_cqe_compress_blks; u64 rx_cqe_compress_pkts; u64 rx_congst_umr; +#ifdef CONFIG_MLX5_EN_ARFS + u64 rx_arfs_add; + u64 rx_arfs_request_in; + u64 rx_arfs_request_out; + u64 rx_arfs_expired; u64 rx_arfs_err; +#endif u64 rx_recover; u64 ch_events; u64 ch_poll; @@ -256,7 +262,6 @@ struct mlx5e_sw_stats { u64 rx_xsk_cqe_compress_blks; u64 rx_xsk_cqe_compress_pkts; u64 rx_xsk_congst_umr; - u64 rx_xsk_arfs_err; u64 tx_xsk_xmit; u64 tx_xsk_mpwqe; u64 tx_xsk_inlnw; @@ -358,7 +363,13 @@ struct mlx5e_rq_stats { u64 cqe_compress_blks; u64 cqe_compress_pkts; u64 congst_umr; +#ifdef CONFIG_MLX5_EN_ARFS + u64 arfs_add; + u64 arfs_request_in; + u64 arfs_request_out; + u64 arfs_expired; u64 arfs_err; +#endif u64 recover; #ifdef CONFIG_PAGE_POOL_STATS u64 pp_alloc_fast; @@ -449,9 +460,7 @@ struct mlx5e_ptp_cq_stats { u64 err_cqe; u64 abort; u64 abort_abs_diff_ns; - u64 resync_cqe; - u64 resync_event; - u64 ooo_cqe_drop; + u64 late_cqe; }; struct mlx5e_rep_stats { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 8d0a3f69693e..318083690fcd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1668,11 +1668,10 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro { struct mlx5e_priv *out_priv, *route_priv; struct mlx5_core_dev *route_mdev; - struct mlx5_devcom *devcom; + struct mlx5_devcom_comp_dev *pos; struct mlx5_eswitch *esw; u16 vhca_id; int err; - int i; out_priv = netdev_priv(out_dev); esw = out_priv->mdev->priv.eswitch; @@ -1688,10 +1687,8 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro return err; rcu_read_lock(); - devcom = out_priv->mdev->priv.devcom; err = -ENODEV; - mlx5_devcom_for_each_peer_entry_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS, - esw, i) { + mlx5_devcom_for_each_peer_entry_rcu(esw->devcom, esw, pos) { err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); if (!err) break; @@ -1725,6 +1722,19 @@ verify_attr_actions(u32 actions, struct netlink_ext_ack *extack) return 0; } +static bool +has_encap_dests(struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int out_index; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) + return true; + + return false; +} + static int post_process_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr, @@ -1737,9 +1747,11 @@ post_process_attr(struct mlx5e_tc_flow *flow, if (err) goto err_out; - err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun); - if (err) - goto err_out; + if (mlx5e_is_eswitch_flow(flow) && has_encap_dests(attr)) { + err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun); + if (err) + goto err_out; + } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr); @@ -1928,9 +1940,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_flow_attr *attr = flow->attr; - struct mlx5_esw_flow_attr *esw_attr; - esw_attr = attr->esw_attr; mlx5e_put_flow_tunnel_id(flow); remove_unready_flow(flow); @@ -1951,12 +1961,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr); - if (esw_attr->int_port) - mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port); - - if (esw_attr->dest_int_port) - mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port); - if (flow_flag_test(flow, L3_TO_L2_DECAP)) mlx5e_detach_decap(priv, flow); @@ -2031,15 +2035,15 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { if (mlx5e_is_eswitch_flow(flow)) { - struct mlx5_devcom *devcom = flow->priv->mdev->priv.devcom; + struct mlx5_devcom_comp_dev *devcom = flow->priv->mdev->priv.eswitch->devcom; - if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) { + if (!mlx5_devcom_for_each_peer_begin(devcom)) { mlx5e_tc_del_fdb_flow(priv, flow); return; } mlx5e_tc_del_fdb_peers_flow(flow); - mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + mlx5_devcom_for_each_peer_end(devcom); mlx5e_tc_del_fdb_flow(priv, flow); } else { mlx5e_tc_del_nic_flow(priv, flow); @@ -2593,29 +2597,29 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, match_level = outer_match_level; if (dissector->used_keys & - ~(BIT(FLOW_DISSECTOR_KEY_META) | - BIT(FLOW_DISSECTOR_KEY_CONTROL) | - BIT(FLOW_DISSECTOR_KEY_BASIC) | - BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | - BIT(FLOW_DISSECTOR_KEY_VLAN) | - BIT(FLOW_DISSECTOR_KEY_CVLAN) | - BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | - BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | - BIT(FLOW_DISSECTOR_KEY_PORTS) | - BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | - BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | - BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | - BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | - BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | - BIT(FLOW_DISSECTOR_KEY_TCP) | - BIT(FLOW_DISSECTOR_KEY_IP) | - BIT(FLOW_DISSECTOR_KEY_CT) | - BIT(FLOW_DISSECTOR_KEY_ENC_IP) | - BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | - BIT(FLOW_DISSECTOR_KEY_ICMP) | - BIT(FLOW_DISSECTOR_KEY_MPLS))) { + ~(BIT_ULL(FLOW_DISSECTOR_KEY_META) | + BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) | + BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) | + BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) | + BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN) | + BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) | + BIT_ULL(FLOW_DISSECTOR_KEY_TCP) | + BIT_ULL(FLOW_DISSECTOR_KEY_IP) | + BIT_ULL(FLOW_DISSECTOR_KEY_CT) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_OPTS) | + BIT_ULL(FLOW_DISSECTOR_KEY_ICMP) | + BIT_ULL(FLOW_DISSECTOR_KEY_MPLS))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); - netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n", + netdev_dbg(priv->netdev, "Unsupported key used: 0x%llx\n", dissector->used_keys); return -EOPNOTSUPP; } @@ -4216,8 +4220,7 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) flow_flag_test(flow, INGRESS); bool act_is_encap = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); - bool esw_paired = mlx5_devcom_comp_is_ready(esw_attr->in_mdev->priv.devcom, - MLX5_DEVCOM_ESW_OFFLOADS); + bool esw_paired = mlx5_devcom_comp_is_ready(esw_attr->in_mdev->priv.eswitch->devcom); if (!esw_paired) return false; @@ -4253,6 +4256,7 @@ static void mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) { struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow); + struct mlx5_esw_flow_attr *esw_attr; if (!attr) return; @@ -4270,6 +4274,18 @@ mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *a mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr); } + if (mlx5e_is_eswitch_flow(flow)) { + esw_attr = attr->esw_attr; + + if (esw_attr->int_port) + mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv), + esw_attr->int_port); + + if (esw_attr->dest_int_port) + mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv), + esw_attr->dest_int_port); + } + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr); free_branch_attr(flow, attr->branch_true); @@ -4471,14 +4487,13 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { - struct mlx5_devcom *devcom = priv->mdev->priv.devcom; + struct mlx5_devcom_comp_dev *devcom = priv->mdev->priv.eswitch->devcom, *pos; struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *in_rep = rpriv->rep; struct mlx5_core_dev *in_mdev = priv->mdev; struct mlx5_eswitch *peer_esw; struct mlx5e_tc_flow *flow; int err; - int i; flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, in_mdev); @@ -4490,27 +4505,25 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, return 0; } - if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) { + if (!mlx5_devcom_for_each_peer_begin(devcom)) { err = -ENODEV; goto clean_flow; } - mlx5_devcom_for_each_peer_entry(devcom, - MLX5_DEVCOM_ESW_OFFLOADS, - peer_esw, i) { + mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) { err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags, peer_esw); if (err) goto peer_clean; } - mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + mlx5_devcom_for_each_peer_end(devcom); *__flow = flow; return 0; peer_clean: mlx5e_tc_del_fdb_peers_flow(flow); - mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + mlx5_devcom_for_each_peer_end(devcom); clean_flow: mlx5e_tc_del_fdb_flow(priv, flow); return err; @@ -4613,6 +4626,46 @@ static bool is_flow_rule_duplicate_allowed(struct net_device *dev, return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK; } +/* As IPsec and TC order is not aligned between software and hardware-offload, + * either IPsec offload or TC offload, not both, is allowed for a specific interface. + */ +static bool is_tc_ipsec_order_check_needed(struct net_device *filter, struct mlx5e_priv *priv) +{ + if (!IS_ENABLED(CONFIG_MLX5_EN_IPSEC)) + return false; + + if (filter != priv->netdev) + return false; + + if (mlx5e_eswitch_vf_rep(priv->netdev)) + return false; + + return true; +} + +static int mlx5e_tc_block_ipsec_offload(struct net_device *filter, struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + if (!is_tc_ipsec_order_check_needed(filter, priv)) + return 0; + + if (mdev->num_block_tc) + return -EBUSY; + + mdev->num_block_ipsec++; + + return 0; +} + +static void mlx5e_tc_unblock_ipsec_offload(struct net_device *filter, struct mlx5e_priv *priv) +{ + if (!is_tc_ipsec_order_check_needed(filter, priv)) + return; + + priv->mdev->num_block_ipsec--; +} + int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, struct flow_cls_offload *f, unsigned long flags) { @@ -4625,6 +4678,10 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, if (!mlx5_esw_hold(priv->mdev)) return -EBUSY; + err = mlx5e_tc_block_ipsec_offload(dev, priv); + if (err) + goto esw_release; + mlx5_esw_get(priv->mdev); rcu_read_lock(); @@ -4670,7 +4727,9 @@ rcu_unlock: err_free: mlx5e_flow_put(priv, flow); out: + mlx5e_tc_unblock_ipsec_offload(dev, priv); mlx5_esw_put(priv->mdev); +esw_release: mlx5_esw_release(priv->mdev); return err; } @@ -4711,6 +4770,7 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, trace_mlx5e_delete_flower(f); mlx5e_flow_put(priv, flow); + mlx5e_tc_unblock_ipsec_offload(dev, priv); mlx5_esw_put(priv->mdev); return 0; @@ -4728,7 +4788,7 @@ int mlx5e_tc_fill_action_stats(struct mlx5e_priv *priv, int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, struct flow_cls_offload *f, unsigned long flags) { - struct mlx5_devcom *devcom = priv->mdev->priv.devcom; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; struct mlx5_fc *counter; @@ -4764,7 +4824,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, /* Under multipath it's possible for one rule to be currently * un-offloaded while the other rule is offloaded. */ - if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) + if (esw && !mlx5_devcom_for_each_peer_begin(esw->devcom)) goto out; if (flow_flag_test(flow, DUP)) { @@ -4795,7 +4855,8 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, } no_peer_counter: - mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (esw) + mlx5_devcom_for_each_peer_end(esw->devcom); out: flow_stats_update(&f->stats, bytes, packets, 0, lastuse, FLOW_ACTION_HW_STATS_DELAYED); @@ -5200,11 +5261,12 @@ void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht) int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) { const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts); + struct netdev_phys_item_id ppid; struct mlx5e_rep_priv *rpriv; struct mapping_ctx *mapping; struct mlx5_eswitch *esw; struct mlx5e_priv *priv; - u64 mapping_id; + u64 mapping_id, key; int err = 0; rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); @@ -5258,7 +5320,11 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) goto err_action_counter; } - mlx5_esw_offloads_devcom_init(esw); + err = dev_get_port_parent_id(priv->netdev, &ppid, false); + if (!err) { + memcpy(&key, &ppid.id, sizeof(key)); + mlx5_esw_offloads_devcom_init(esw, key); + } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index c7eb6b238c2b..d41435c22ce5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -372,7 +372,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, const struct mlx5e_tx_attr *attr, const struct mlx5e_tx_wqe_attr *wqe_attr, u8 num_dma, struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg, - bool xmit_more) + struct mlx5_wqe_eth_seg *eseg, bool xmit_more) { struct mlx5_wq_cyc *wq = &sq->wq; bool send_doorbell; @@ -394,11 +394,16 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, mlx5e_tx_check_stop(sq); - if (unlikely(sq->ptpsq)) { + if (unlikely(sq->ptpsq && + (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) { + u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata); + mlx5e_skb_cb_hwtstamp_init(skb); - mlx5e_skb_fifo_push(&sq->ptpsq->skb_fifo, skb); + mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index); + mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb, + metadata_index); if (!netif_tx_queue_stopped(sq->txq) && - !mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo)) { + mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq)) { netif_tx_stop_queue(sq->txq); sq->stats->stopped++; } @@ -483,13 +488,16 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, if (unlikely(num_dma < 0)) goto err_drop; - mlx5e_txwqe_complete(sq, skb, attr, wqe_attr, num_dma, wi, cseg, xmit_more); + mlx5e_txwqe_complete(sq, skb, attr, wqe_attr, num_dma, wi, cseg, eseg, xmit_more); return; err_drop: stats->dropped++; dev_kfree_skb_any(skb); + if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) + mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist, + be32_to_cpu(eseg->flow_table_metadata)); mlx5e_tx_flush(sq); } @@ -645,9 +653,9 @@ void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) { - if (ptpsq->ts_cqe_ctr_mask && unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) - eseg->flow_table_metadata = cpu_to_be32(ptpsq->skb_fifo_pc & - ptpsq->ts_cqe_ctr_mask); + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + eseg->flow_table_metadata = + cpu_to_be32(mlx5e_ptp_metadata_fifo_pop(&ptpsq->metadata_freelist)); } static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq, @@ -766,7 +774,7 @@ void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq) { if (netif_tx_queue_stopped(sq->txq) && mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) && - mlx5e_ptpsq_fifo_has_room(sq) && + !mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq) && !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { netif_tx_wake_queue(sq->txq); sq->stats->wake++; @@ -1031,7 +1039,7 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, if (unlikely(num_dma < 0)) goto err_drop; - mlx5e_txwqe_complete(sq, skb, &attr, &wqe_attr, num_dma, wi, cseg, xmit_more); + mlx5e_txwqe_complete(sq, skb, &attr, &wqe_attr, num_dma, wi, cseg, eseg, xmit_more); return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 3db4866d7880..ea0405e0a43f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -47,7 +47,7 @@ enum { static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE); struct mlx5_eq_table { - struct list_head comp_eqs_list; + struct xarray comp_eqs; struct mlx5_eq_async pages_eq; struct mlx5_eq_async cmd_eq; struct mlx5_eq_async async_eq; @@ -58,11 +58,14 @@ struct mlx5_eq_table { struct mlx5_nb cq_err_nb; struct mutex lock; /* sync async eqs creations */ - int num_comp_eqs; + struct mutex comp_lock; /* sync comp eqs creations */ + int curr_comp_eqs; + int max_comp_eqs; struct mlx5_irq_table *irq_table; - struct mlx5_irq **comp_irqs; + struct xarray comp_irqs; struct mlx5_irq *ctrl_irq; struct cpu_rmap *rmap; + struct cpumask used_cpus; }; #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ @@ -452,13 +455,22 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); eq_table->irq_table = mlx5_irq_table_get(dev); + cpumask_clear(&eq_table->used_cpus); + xa_init(&eq_table->comp_eqs); + xa_init(&eq_table->comp_irqs); + mutex_init(&eq_table->comp_lock); + eq_table->curr_comp_eqs = 0; return 0; } void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) { + struct mlx5_eq_table *table = dev->priv.eq_table; + mlx5_eq_debugfs_cleanup(dev); - kvfree(dev->priv.eq_table); + xa_destroy(&table->comp_irqs); + xa_destroy(&table->comp_eqs); + kvfree(table); } /* Async EQs */ @@ -803,88 +815,112 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) } EXPORT_SYMBOL(mlx5_eq_update_ci); -static void comp_irqs_release_pci(struct mlx5_core_dev *dev) +static void comp_irq_release_pci(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_irq *irq; + + irq = xa_load(&table->comp_irqs, vecidx); + if (!irq) + return; - mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs); + xa_erase(&table->comp_irqs, vecidx); + mlx5_irq_release_vector(irq); } -static int comp_irqs_request_pci(struct mlx5_core_dev *dev) +static int mlx5_cpumask_default_spread(int numa_node, int index) { - struct mlx5_eq_table *table = dev->priv.eq_table; const struct cpumask *prev = cpu_none_mask; const struct cpumask *mask; - int ncomp_eqs; - u16 *cpus; - int ret; + int found_cpu = 0; + int i = 0; int cpu; - int i; - - ncomp_eqs = table->num_comp_eqs; - cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL); - if (!cpus) - return -ENOMEM; - i = 0; rcu_read_lock(); - for_each_numa_hop_mask(mask, dev->priv.numa_node) { + for_each_numa_hop_mask(mask, numa_node) { for_each_cpu_andnot(cpu, mask, prev) { - cpus[i] = cpu; - if (++i == ncomp_eqs) + if (i++ == index) { + found_cpu = cpu; goto spread_done; + } } prev = mask; } + spread_done: rcu_read_unlock(); - ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs, &table->rmap); - kfree(cpus); - return ret; + return found_cpu; } -static void comp_irqs_release_sf(struct mlx5_core_dev *dev) +static struct cpu_rmap *mlx5_eq_table_get_pci_rmap(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = dev->priv.eq_table; - - mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs); +#ifdef CONFIG_RFS_ACCEL +#ifdef CONFIG_MLX5_SF + if (mlx5_core_is_sf(dev)) + return dev->priv.parent_mdev->priv.eq_table->rmap; +#endif + return dev->priv.eq_table->rmap; +#else + return NULL; +#endif } -static int comp_irqs_request_sf(struct mlx5_core_dev *dev) +static int comp_irq_request_pci(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; - int ncomp_eqs = table->num_comp_eqs; + struct cpu_rmap *rmap; + struct mlx5_irq *irq; + int cpu; + + rmap = mlx5_eq_table_get_pci_rmap(dev); + cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vecidx); + irq = mlx5_irq_request_vector(dev, cpu, vecidx, &rmap); + if (IS_ERR(irq)) + return PTR_ERR(irq); - return mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs); + return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL)); } -static void comp_irqs_release(struct mlx5_core_dev *dev) +static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_irq *irq; - mlx5_core_is_sf(dev) ? comp_irqs_release_sf(dev) : - comp_irqs_release_pci(dev); + irq = xa_load(&table->comp_irqs, vecidx); + if (!irq) + return; - kfree(table->comp_irqs); + xa_erase(&table->comp_irqs, vecidx); + mlx5_irq_affinity_irq_release(dev, irq); } -static int comp_irqs_request(struct mlx5_core_dev *dev) +static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; - int ncomp_eqs; - int ret; + struct mlx5_irq *irq; - ncomp_eqs = table->num_comp_eqs; - table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL); - if (!table->comp_irqs) - return -ENOMEM; + irq = mlx5_irq_affinity_irq_request_auto(dev, &table->used_cpus, vecidx); + if (IS_ERR(irq)) { + /* In case SF irq pool does not exist, fallback to the PF irqs*/ + if (PTR_ERR(irq) == -ENOENT) + return comp_irq_request_pci(dev, vecidx); + + return PTR_ERR(irq); + } + + return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL)); +} - ret = mlx5_core_is_sf(dev) ? comp_irqs_request_sf(dev) : - comp_irqs_request_pci(dev); - if (ret < 0) - kfree(table->comp_irqs); +static void comp_irq_release(struct mlx5_core_dev *dev, u16 vecidx) +{ + mlx5_core_is_sf(dev) ? comp_irq_release_sf(dev, vecidx) : + comp_irq_release_pci(dev, vecidx); +} - return ret; +static int comp_irq_request(struct mlx5_core_dev *dev, u16 vecidx) +{ + return mlx5_core_is_sf(dev) ? comp_irq_request_sf(dev, vecidx) : + comp_irq_request_pci(dev, vecidx); } #ifdef CONFIG_RFS_ACCEL @@ -901,7 +937,7 @@ static int alloc_rmap(struct mlx5_core_dev *mdev) if (mlx5_core_is_sf(mdev)) return 0; - eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs); + eq_table->rmap = alloc_irq_cpu_rmap(eq_table->max_comp_eqs); if (!eq_table->rmap) return -ENOMEM; return 0; @@ -921,22 +957,19 @@ static int alloc_rmap(struct mlx5_core_dev *mdev) { return 0; } static void free_rmap(struct mlx5_core_dev *mdev) {} #endif -static void destroy_comp_eqs(struct mlx5_core_dev *dev) +static void destroy_comp_eq(struct mlx5_core_dev *dev, struct mlx5_eq_comp *eq, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_eq_comp *eq, *n; - - list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { - list_del(&eq->list); - mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); - if (destroy_unmap_eq(dev, &eq->core)) - mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", - eq->core.eqn); - tasklet_disable(&eq->tasklet_ctx.task); - kfree(eq); - } - comp_irqs_release(dev); - free_rmap(dev); + + xa_erase(&table->comp_eqs, vecidx); + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); + if (destroy_unmap_eq(dev, &eq->core)) + mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", + eq->core.eqn); + tasklet_disable(&eq->tasklet_ctx.task); + kfree(eq); + comp_irq_release(dev, vecidx); + table->curr_comp_eqs--; } static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) @@ -954,129 +987,149 @@ static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) return MLX5_COMP_EQ_SIZE; } -static int create_comp_eqs(struct mlx5_core_dev *dev) +/* Must be called with EQ table comp_lock held */ +static int create_comp_eq(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_param param = {}; struct mlx5_eq_comp *eq; - int ncomp_eqs; + struct mlx5_irq *irq; int nent; int err; - int i; - err = alloc_rmap(dev); + lockdep_assert_held(&table->comp_lock); + if (table->curr_comp_eqs == table->max_comp_eqs) { + mlx5_core_err(dev, "maximum number of vectors is allocated, %d\n", + table->max_comp_eqs); + return -ENOMEM; + } + + err = comp_irq_request(dev, vecidx); if (err) return err; - ncomp_eqs = comp_irqs_request(dev); - if (ncomp_eqs < 0) { - err = ncomp_eqs; - goto err_irqs_req; - } - - INIT_LIST_HEAD(&table->comp_eqs_list); nent = comp_eq_depth_devlink_param_get(dev); - for (i = 0; i < ncomp_eqs; i++) { - struct mlx5_eq_param param = {}; + eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, dev->priv.numa_node); + if (!eq) { + err = -ENOMEM; + goto clean_irq; + } - eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, dev->priv.numa_node); - if (!eq) { - err = -ENOMEM; - goto clean; - } + INIT_LIST_HEAD(&eq->tasklet_ctx.list); + INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); + spin_lock_init(&eq->tasklet_ctx.lock); + tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb); - INIT_LIST_HEAD(&eq->tasklet_ctx.list); - INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); - spin_lock_init(&eq->tasklet_ctx.lock); - tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb); - - eq->irq_nb.notifier_call = mlx5_eq_comp_int; - param = (struct mlx5_eq_param) { - .irq = table->comp_irqs[i], - .nent = nent, - }; - - err = create_map_eq(dev, &eq->core, ¶m); - if (err) - goto clean_eq; - err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); - if (err) { - destroy_unmap_eq(dev, &eq->core); - goto clean_eq; - } + irq = xa_load(&table->comp_irqs, vecidx); + eq->irq_nb.notifier_call = mlx5_eq_comp_int; + param = (struct mlx5_eq_param) { + .irq = irq, + .nent = nent, + }; - mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); - /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ - list_add_tail(&eq->list, &table->comp_eqs_list); + err = create_map_eq(dev, &eq->core, ¶m); + if (err) + goto clean_eq; + err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); + if (err) { + destroy_unmap_eq(dev, &eq->core); + goto clean_eq; } - table->num_comp_eqs = ncomp_eqs; - return 0; + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); + err = xa_err(xa_store(&table->comp_eqs, vecidx, eq, GFP_KERNEL)); + if (err) + goto disable_eq; + + table->curr_comp_eqs++; + return eq->core.eqn; +disable_eq: + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); clean_eq: kfree(eq); -clean: - destroy_comp_eqs(dev); -err_irqs_req: - free_rmap(dev); +clean_irq: + comp_irq_release(dev, vecidx); return err; } -static int vector2eqnirqn(struct mlx5_core_dev *dev, int vector, int *eqn, - unsigned int *irqn) +int mlx5_comp_eqn_get(struct mlx5_core_dev *dev, u16 vecidx, int *eqn) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq; - int err = -ENOENT; - int i = 0; + int ret = 0; - list_for_each_entry(eq, &table->comp_eqs_list, list) { - if (i++ == vector) { - if (irqn) - *irqn = eq->core.irqn; - if (eqn) - *eqn = eq->core.eqn; - err = 0; - break; - } + mutex_lock(&table->comp_lock); + eq = xa_load(&table->comp_eqs, vecidx); + if (eq) { + *eqn = eq->core.eqn; + goto out; } - return err; -} + ret = create_comp_eq(dev, vecidx); + if (ret < 0) { + mutex_unlock(&table->comp_lock); + return ret; + } -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn) -{ - return vector2eqnirqn(dev, vector, eqn, NULL); + *eqn = ret; +out: + mutex_unlock(&table->comp_lock); + return 0; } -EXPORT_SYMBOL(mlx5_vector2eqn); +EXPORT_SYMBOL(mlx5_comp_eqn_get); -int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn) +int mlx5_comp_irqn_get(struct mlx5_core_dev *dev, int vector, unsigned int *irqn) { - return vector2eqnirqn(dev, vector, NULL, irqn); + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq; + int eqn; + int err; + + /* Allocate the EQ if not allocated yet */ + err = mlx5_comp_eqn_get(dev, vector, &eqn); + if (err) + return err; + + eq = xa_load(&table->comp_eqs, vector); + *irqn = eq->core.irqn; + return 0; } -unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) +unsigned int mlx5_comp_vectors_max(struct mlx5_core_dev *dev) { - return dev->priv.eq_table->num_comp_eqs; + return dev->priv.eq_table->max_comp_eqs; } -EXPORT_SYMBOL(mlx5_comp_vectors_count); +EXPORT_SYMBOL(mlx5_comp_vectors_max); -struct cpumask * +static struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq; - int i = 0; - list_for_each_entry(eq, &table->comp_eqs_list, list) { - if (i++ == vector) - return mlx5_irq_get_affinity_mask(eq->core.irq); - } + eq = xa_load(&table->comp_eqs, vector); + if (eq) + return mlx5_irq_get_affinity_mask(eq->core.irq); - WARN_ON_ONCE(1); return NULL; } -EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); + +int mlx5_comp_vector_get_cpu(struct mlx5_core_dev *dev, int vector) +{ + struct cpumask *mask; + int cpu; + + mask = mlx5_comp_irq_get_affinity_mask(dev, vector); + if (mask) + cpu = cpumask_first(mask); + else + cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vector); + + return cpu; +} +EXPORT_SYMBOL(mlx5_comp_vector_get_cpu); #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) @@ -1089,11 +1142,11 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq; + unsigned long index; - list_for_each_entry(eq, &table->comp_eqs_list, list) { + xa_for_each(&table->comp_eqs, index, eq) if (eq->core.eqn == eqn) return eq; - } return ERR_PTR(-ENOENT); } @@ -1101,11 +1154,7 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) /* This function should only be called after mlx5_cmd_force_teardown_hca */ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = dev->priv.eq_table; - - mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ mlx5_irq_table_free_irqs(dev); - mutex_unlock(&table->lock); } #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING @@ -1148,22 +1197,22 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; - eq_table->num_comp_eqs = get_num_eqs(dev); + eq_table->max_comp_eqs = get_num_eqs(dev); err = create_async_eqs(dev); if (err) { mlx5_core_err(dev, "Failed to create async EQs\n"); goto err_async_eqs; } - err = create_comp_eqs(dev); + err = alloc_rmap(dev); if (err) { - mlx5_core_err(dev, "Failed to create completion EQs\n"); - goto err_comp_eqs; + mlx5_core_err(dev, "Failed to allocate rmap\n"); + goto err_rmap; } return 0; -err_comp_eqs: +err_rmap: destroy_async_eqs(dev); err_async_eqs: return err; @@ -1171,7 +1220,14 @@ err_async_eqs: void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) { - destroy_comp_eqs(dev); + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq; + unsigned long index; + + xa_for_each(&table->comp_eqs, index, eq) + destroy_comp_eq(dev, eq, index); + + free_rmap(dev); destroy_async_eqs(dev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index f4fe1daa4afd..e36294b7ade2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -652,30 +652,30 @@ mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, struct mlx5_esw_bridge *bridge) { - struct mlx5_devcom *devcom = bridge->br_offloads->esw->dev->priv.devcom; + struct mlx5_devcom_comp_dev *devcom = bridge->br_offloads->esw->devcom, *pos; struct mlx5_eswitch *tmp, *peer_esw = NULL; static struct mlx5_flow_handle *handle; - int i; - if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) + if (!mlx5_devcom_for_each_peer_begin(devcom)) return ERR_PTR(-ENODEV); - mlx5_devcom_for_each_peer_entry(devcom, - MLX5_DEVCOM_ESW_OFFLOADS, - tmp, i) { + mlx5_devcom_for_each_peer_entry(devcom, tmp, pos) { if (mlx5_esw_is_owner(tmp, vport_num, esw_owner_vhca_id)) { peer_esw = tmp; break; } } + if (!peer_esw) { - mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - return ERR_PTR(-ENODEV); + handle = ERR_PTR(-ENODEV); + goto out; } handle = mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id, bridge, peer_esw); - mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + +out: + mlx5_devcom_for_each_peer_end(devcom); return handle; } @@ -1391,8 +1391,8 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, u16 esw_ow mlx5_fc_id(counter), bridge); if (IS_ERR(handle)) { err = PTR_ERR(handle); - esw_warn(esw->dev, "Failed to create ingress flow(vport=%u,err=%d)\n", - vport_num, err); + esw_warn(esw->dev, "Failed to create ingress flow(vport=%u,err=%d,peer=%d)\n", + vport_num, err, peer); goto err_ingress_flow_create; } entry->ingress_handle = handle; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c index b6a45eff28f5..dbd7cbe6cbf3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c @@ -64,7 +64,7 @@ void mlx5_esw_bridge_debugfs_init(struct net_device *br_netdev, struct mlx5_esw_ bridge->debugfs_dir = debugfs_create_dir(br_netdev->name, bridge->br_offloads->debugfs_root); - debugfs_create_file("fdb", 0444, bridge->debugfs_dir, bridge, + debugfs_create_file("fdb", 0400, bridge->debugfs_dir, bridge, &mlx5_esw_bridge_debugfs_fops); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c index 2455f8b93c1e..7a01714b3780 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c @@ -539,30 +539,29 @@ mlx5_esw_bridge_mcast_filter_flow_create(struct mlx5_esw_bridge_port *port) static struct mlx5_flow_handle * mlx5_esw_bridge_mcast_filter_flow_peer_create(struct mlx5_esw_bridge_port *port) { - struct mlx5_devcom *devcom = port->bridge->br_offloads->esw->dev->priv.devcom; + struct mlx5_devcom_comp_dev *devcom = port->bridge->br_offloads->esw->devcom, *pos; struct mlx5_eswitch *tmp, *peer_esw = NULL; static struct mlx5_flow_handle *handle; - int i; - if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) + if (!mlx5_devcom_for_each_peer_begin(devcom)) return ERR_PTR(-ENODEV); - mlx5_devcom_for_each_peer_entry(devcom, - MLX5_DEVCOM_ESW_OFFLOADS, - tmp, i) { + mlx5_devcom_for_each_peer_entry(devcom, tmp, pos) { if (mlx5_esw_is_owner(tmp, port->vport_num, port->esw_owner_vhca_id)) { peer_esw = tmp; break; } } + if (!peer_esw) { - mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - return ERR_PTR(-ENODEV); + handle = ERR_PTR(-ENODEV); + goto out; } handle = mlx5_esw_bridge_mcast_flow_with_esw_create(port, peer_esw); - mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +out: + mlx5_devcom_for_each_peer_end(devcom); return handle; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index af779c700278..d8e739cbcbce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -16,39 +16,28 @@ mlx5_esw_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_i static bool mlx5_esw_devlink_port_supported(struct mlx5_eswitch *esw, u16 vport_num) { - return vport_num == MLX5_VPORT_UPLINK || - (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) || + return (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) || mlx5_eswitch_is_vf_vport(esw, vport_num) || mlx5_core_is_ec_vf_vport(esw->dev, vport_num); } -static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 vport_num) +static void mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(struct mlx5_eswitch *esw, + u16 vport_num, + struct devlink_port *dl_port) { struct mlx5_core_dev *dev = esw->dev; - struct devlink_port_attrs attrs = {}; struct netdev_phys_item_id ppid = {}; - struct devlink_port *dl_port; u32 controller_num = 0; bool external; u16 pfnum; - dl_port = kzalloc(sizeof(*dl_port), GFP_KERNEL); - if (!dl_port) - return NULL; - mlx5_esw_get_port_parent_id(dev, &ppid); pfnum = mlx5_get_dev_index(dev); external = mlx5_core_is_ecpf_esw_manager(dev); if (external) controller_num = dev->priv.eswitch->offloads.host_number + 1; - if (vport_num == MLX5_VPORT_UPLINK) { - attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - attrs.phys.port_number = pfnum; - memcpy(attrs.switch_id.id, ppid.id, ppid.id_len); - attrs.switch_id.id_len = ppid.id_len; - devlink_port_attrs_set(dl_port, &attrs); - } else if (vport_num == MLX5_VPORT_PF) { + if (vport_num == MLX5_VPORT_PF) { memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); dl_port->attrs.switch_id.id_len = ppid.id_len; devlink_port_attrs_pci_pf_set(dl_port, controller_num, pfnum, external); @@ -60,94 +49,86 @@ static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 } else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) { memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); dl_port->attrs.switch_id.id_len = ppid.id_len; - devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum, + devlink_port_attrs_pci_vf_set(dl_port, 0, pfnum, vport_num - 1, false); } - return dl_port; } -static void mlx5_esw_dl_port_free(struct devlink_port *dl_port) +int mlx5_esw_offloads_pf_vf_devlink_port_init(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - kfree(dl_port); -} - -static const struct devlink_port_ops mlx5_esw_dl_port_ops = { - .port_fn_hw_addr_get = mlx5_devlink_port_fn_hw_addr_get, - .port_fn_hw_addr_set = mlx5_devlink_port_fn_hw_addr_set, - .port_fn_roce_get = mlx5_devlink_port_fn_roce_get, - .port_fn_roce_set = mlx5_devlink_port_fn_roce_set, - .port_fn_migratable_get = mlx5_devlink_port_fn_migratable_get, - .port_fn_migratable_set = mlx5_devlink_port_fn_migratable_set, -}; - -int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num) -{ - struct mlx5_core_dev *dev = esw->dev; - struct devlink_port *dl_port; - unsigned int dl_port_index; - struct mlx5_vport *vport; - struct devlink *devlink; - int err; + struct mlx5_devlink_port *dl_port; + u16 vport_num = vport->vport; if (!mlx5_esw_devlink_port_supported(esw, vport_num)) return 0; - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) - return PTR_ERR(vport); - - dl_port = mlx5_esw_dl_port_alloc(esw, vport_num); + dl_port = kzalloc(sizeof(*dl_port), GFP_KERNEL); if (!dl_port) return -ENOMEM; - devlink = priv_to_devlink(dev); - dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num); - err = devl_port_register_with_ops(devlink, dl_port, dl_port_index, - &mlx5_esw_dl_port_ops); - if (err) - goto reg_err; - - err = devl_rate_leaf_create(dl_port, vport, NULL); - if (err) - goto rate_err; + mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(esw, vport_num, + &dl_port->dl_port); vport->dl_port = dl_port; + mlx5_devlink_port_init(dl_port, vport); return 0; - -rate_err: - devl_port_unregister(dl_port); -reg_err: - mlx5_esw_dl_port_free(dl_port); - return err; } -void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num) +void mlx5_esw_offloads_pf_vf_devlink_port_cleanup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_vport *vport; - - if (!mlx5_esw_devlink_port_supported(esw, vport_num)) + if (!vport->dl_port) return; - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) - return; + kfree(vport->dl_port); + vport->dl_port = NULL; +} - if (vport->dl_port->devlink_rate) { - mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL); - devl_rate_leaf_destroy(vport->dl_port); - } +static const struct devlink_port_ops mlx5_esw_pf_vf_dl_port_ops = { + .port_fn_hw_addr_get = mlx5_devlink_port_fn_hw_addr_get, + .port_fn_hw_addr_set = mlx5_devlink_port_fn_hw_addr_set, + .port_fn_roce_get = mlx5_devlink_port_fn_roce_get, + .port_fn_roce_set = mlx5_devlink_port_fn_roce_set, + .port_fn_migratable_get = mlx5_devlink_port_fn_migratable_get, + .port_fn_migratable_set = mlx5_devlink_port_fn_migratable_set, +#ifdef CONFIG_XFRM_OFFLOAD + .port_fn_ipsec_crypto_get = mlx5_devlink_port_fn_ipsec_crypto_get, + .port_fn_ipsec_crypto_set = mlx5_devlink_port_fn_ipsec_crypto_set, + .port_fn_ipsec_packet_get = mlx5_devlink_port_fn_ipsec_packet_get, + .port_fn_ipsec_packet_set = mlx5_devlink_port_fn_ipsec_packet_set, +#endif /* CONFIG_XFRM_OFFLOAD */ +}; - devl_port_unregister(vport->dl_port); - mlx5_esw_dl_port_free(vport->dl_port); - vport->dl_port = NULL; +static void mlx5_esw_offloads_sf_devlink_port_attrs_set(struct mlx5_eswitch *esw, + struct devlink_port *dl_port, + u32 controller, u32 sfnum) +{ + struct mlx5_core_dev *dev = esw->dev; + struct netdev_phys_item_id ppid = {}; + u16 pfnum; + + pfnum = mlx5_get_dev_index(dev); + mlx5_esw_get_port_parent_id(dev, &ppid); + memcpy(dl_port->attrs.switch_id.id, &ppid.id[0], ppid.id_len); + dl_port->attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_pci_sf_set(dl_port, controller, pfnum, sfnum, !!controller); } -struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num) +int mlx5_esw_offloads_sf_devlink_port_init(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + struct mlx5_devlink_port *dl_port, + u32 controller, u32 sfnum) { - struct mlx5_vport *vport; + mlx5_esw_offloads_sf_devlink_port_attrs_set(esw, &dl_port->dl_port, controller, sfnum); - vport = mlx5_eswitch_get_vport(esw, vport_num); - return IS_ERR(vport) ? ERR_CAST(vport) : vport->dl_port; + vport->dl_port = dl_port; + mlx5_devlink_port_init(dl_port, vport); + return 0; +} + +void mlx5_esw_offloads_sf_devlink_port_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + vport->dl_port = NULL; } static const struct devlink_port_ops mlx5_esw_dl_sf_port_ops = { @@ -164,58 +145,62 @@ static const struct devlink_port_ops mlx5_esw_dl_sf_port_ops = { #endif }; -int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, - u16 vport_num, u32 controller, u32 sfnum) +int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { struct mlx5_core_dev *dev = esw->dev; - struct netdev_phys_item_id ppid = {}; + const struct devlink_port_ops *ops; + struct mlx5_devlink_port *dl_port; + u16 vport_num = vport->vport; unsigned int dl_port_index; - struct mlx5_vport *vport; struct devlink *devlink; - u16 pfnum; int err; - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) - return PTR_ERR(vport); + dl_port = vport->dl_port; + if (!dl_port) + return 0; + + if (mlx5_esw_is_sf_vport(esw, vport_num)) + ops = &mlx5_esw_dl_sf_port_ops; + else if (mlx5_eswitch_is_pf_vf_vport(esw, vport_num)) + ops = &mlx5_esw_pf_vf_dl_port_ops; + else + ops = NULL; - pfnum = mlx5_get_dev_index(dev); - mlx5_esw_get_port_parent_id(dev, &ppid); - memcpy(dl_port->attrs.switch_id.id, &ppid.id[0], ppid.id_len); - dl_port->attrs.switch_id.id_len = ppid.id_len; - devlink_port_attrs_pci_sf_set(dl_port, controller, pfnum, sfnum, !!controller); devlink = priv_to_devlink(dev); dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num); - err = devl_port_register_with_ops(devlink, dl_port, dl_port_index, - &mlx5_esw_dl_sf_port_ops); + err = devl_port_register_with_ops(devlink, &dl_port->dl_port, dl_port_index, ops); if (err) return err; - err = devl_rate_leaf_create(dl_port, vport, NULL); + err = devl_rate_leaf_create(&dl_port->dl_port, vport, NULL); if (err) goto rate_err; - vport->dl_port = dl_port; return 0; rate_err: - devl_port_unregister(dl_port); + devl_port_unregister(&dl_port->dl_port); return err; } -void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num) +void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - struct mlx5_vport *vport; + struct mlx5_devlink_port *dl_port; - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) + if (!vport->dl_port) return; + dl_port = vport->dl_port; - if (vport->dl_port->devlink_rate) { - mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL); - devl_rate_leaf_destroy(vport->dl_port); - } + mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL); + devl_rate_leaf_destroy(&dl_port->dl_port); - devl_port_unregister(vport->dl_port); - vport->dl_port = NULL; + devl_port_unregister(&dl_port->dl_port); +} + +struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + return IS_ERR(vport) ? ERR_CAST(vport) : &vport->dl_port->dl_port; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec.c new file mode 100644 index 000000000000..da10e04777cf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec.c @@ -0,0 +1,369 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/mlx5/device.h> +#include <linux/mlx5/vport.h> +#include "mlx5_core.h" +#include "eswitch.h" + +static int esw_ipsec_vf_query_generic(struct mlx5_core_dev *dev, u16 vport_num, bool *result) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *hca_cap, *query_cap; + int err; + + if (!MLX5_CAP_GEN(dev, vhca_resource_manager)) + return -EOPNOTSUPP; + + if (!mlx5_esw_ipsec_vf_offload_supported(dev)) { + *result = false; + return 0; + } + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + if (!query_cap) + return -ENOMEM; + + err = mlx5_vport_get_other_func_general_cap(dev, vport_num, query_cap); + if (err) + goto free; + + hca_cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability); + *result = MLX5_GET(cmd_hca_cap, hca_cap, ipsec_offload); +free: + kvfree(query_cap); + return err; +} + +enum esw_vport_ipsec_offload { + MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD, + MLX5_ESW_VPORT_IPSEC_PACKET_OFFLOAD, +}; + +int mlx5_esw_ipsec_vf_offload_get(struct mlx5_core_dev *dev, struct mlx5_vport *vport) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *hca_cap, *query_cap; + bool ipsec_enabled; + int err; + + /* Querying IPsec caps only makes sense when generic ipsec_offload + * HCA cap is enabled + */ + err = esw_ipsec_vf_query_generic(dev, vport->vport, &ipsec_enabled); + if (err) + return err; + + if (!ipsec_enabled) { + vport->info.ipsec_crypto_enabled = false; + vport->info.ipsec_packet_enabled = false; + return 0; + } + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + if (!query_cap) + return -ENOMEM; + + err = mlx5_vport_get_other_func_cap(dev, vport->vport, query_cap, MLX5_CAP_IPSEC); + if (err) + goto free; + + hca_cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability); + vport->info.ipsec_crypto_enabled = + MLX5_GET(ipsec_cap, hca_cap, ipsec_crypto_offload); + vport->info.ipsec_packet_enabled = + MLX5_GET(ipsec_cap, hca_cap, ipsec_full_offload); +free: + kvfree(query_cap); + return err; +} + +static int esw_ipsec_vf_set_generic(struct mlx5_core_dev *dev, u16 vport_num, bool ipsec_ofld) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *hca_cap, *query_cap, *cap; + int ret; + + if (!MLX5_CAP_GEN(dev, vhca_resource_manager)) + return -EOPNOTSUPP; + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + hca_cap = kvzalloc(set_sz, GFP_KERNEL); + if (!hca_cap || !query_cap) { + ret = -ENOMEM; + goto free; + } + + ret = mlx5_vport_get_other_func_general_cap(dev, vport_num, query_cap); + if (ret) + goto free; + + cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability); + memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability), + MLX5_UN_SZ_BYTES(hca_cap_union)); + MLX5_SET(cmd_hca_cap, cap, ipsec_offload, ipsec_ofld); + + MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); + MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); + MLX5_SET(set_hca_cap_in, hca_cap, function_id, vport_num); + + MLX5_SET(set_hca_cap_in, hca_cap, op_mod, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1); + ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap); +free: + kvfree(hca_cap); + kvfree(query_cap); + return ret; +} + +static int esw_ipsec_vf_set_bytype(struct mlx5_core_dev *dev, struct mlx5_vport *vport, + bool enable, enum esw_vport_ipsec_offload type) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *hca_cap, *query_cap, *cap; + int ret; + + if (!MLX5_CAP_GEN(dev, vhca_resource_manager)) + return -EOPNOTSUPP; + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + hca_cap = kvzalloc(set_sz, GFP_KERNEL); + if (!hca_cap || !query_cap) { + ret = -ENOMEM; + goto free; + } + + ret = mlx5_vport_get_other_func_cap(dev, vport->vport, query_cap, MLX5_CAP_IPSEC); + if (ret) + goto free; + + cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability); + memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability), + MLX5_UN_SZ_BYTES(hca_cap_union)); + + switch (type) { + case MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD: + MLX5_SET(ipsec_cap, cap, ipsec_crypto_offload, enable); + break; + case MLX5_ESW_VPORT_IPSEC_PACKET_OFFLOAD: + MLX5_SET(ipsec_cap, cap, ipsec_full_offload, enable); + break; + default: + ret = -EOPNOTSUPP; + goto free; + } + + MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); + MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); + MLX5_SET(set_hca_cap_in, hca_cap, function_id, vport->vport); + + MLX5_SET(set_hca_cap_in, hca_cap, op_mod, + MLX5_SET_HCA_CAP_OP_MOD_IPSEC << 1); + ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap); +free: + kvfree(hca_cap); + kvfree(query_cap); + return ret; +} + +static int esw_ipsec_vf_crypto_aux_caps_set(struct mlx5_core_dev *dev, u16 vport_num, bool enable) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + struct mlx5_eswitch *esw = dev->priv.eswitch; + void *hca_cap, *query_cap, *cap; + int ret; + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + hca_cap = kvzalloc(set_sz, GFP_KERNEL); + if (!hca_cap || !query_cap) { + ret = -ENOMEM; + goto free; + } + + ret = mlx5_vport_get_other_func_cap(dev, vport_num, query_cap, MLX5_CAP_ETHERNET_OFFLOADS); + if (ret) + goto free; + + cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability); + memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability), + MLX5_UN_SZ_BYTES(hca_cap_union)); + MLX5_SET(per_protocol_networking_offload_caps, cap, insert_trailer, enable); + MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); + MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); + MLX5_SET(set_hca_cap_in, hca_cap, function_id, vport_num); + MLX5_SET(set_hca_cap_in, hca_cap, op_mod, + MLX5_SET_HCA_CAP_OP_MOD_ETHERNET_OFFLOADS << 1); + ret = mlx5_cmd_exec_in(esw->dev, set_hca_cap, hca_cap); +free: + kvfree(hca_cap); + kvfree(query_cap); + return ret; +} + +static int esw_ipsec_vf_offload_set_bytype(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + bool enable, enum esw_vport_ipsec_offload type) +{ + struct mlx5_core_dev *dev = esw->dev; + int err; + + if (vport->vport == MLX5_VPORT_PF) + return -EOPNOTSUPP; + + if (type == MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD) { + err = esw_ipsec_vf_crypto_aux_caps_set(dev, vport->vport, enable); + if (err) + return err; + } + + if (enable) { + err = esw_ipsec_vf_set_generic(dev, vport->vport, enable); + if (err) + return err; + err = esw_ipsec_vf_set_bytype(dev, vport, enable, type); + if (err) + return err; + } else { + err = esw_ipsec_vf_set_bytype(dev, vport, enable, type); + if (err) + return err; + err = mlx5_esw_ipsec_vf_offload_get(dev, vport); + if (err) + return err; + + /* The generic ipsec_offload cap can be disabled only if both + * ipsec_crypto_offload and ipsec_full_offload aren't enabled. + */ + if (!vport->info.ipsec_crypto_enabled && + !vport->info.ipsec_packet_enabled) { + err = esw_ipsec_vf_set_generic(dev, vport->vport, enable); + if (err) + return err; + } + } + + switch (type) { + case MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD: + vport->info.ipsec_crypto_enabled = enable; + break; + case MLX5_ESW_VPORT_IPSEC_PACKET_OFFLOAD: + vport->info.ipsec_packet_enabled = enable; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int esw_ipsec_offload_supported(struct mlx5_core_dev *dev, u16 vport_num) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *hca_cap, *query_cap; + int ret; + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + if (!query_cap) + return -ENOMEM; + + ret = mlx5_vport_get_other_func_cap(dev, vport_num, query_cap, MLX5_CAP_GENERAL); + if (ret) + goto free; + + hca_cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability); + if (!MLX5_GET(cmd_hca_cap, hca_cap, log_max_dek)) + ret = -EOPNOTSUPP; +free: + kvfree(query_cap); + return ret; +} + +bool mlx5_esw_ipsec_vf_offload_supported(struct mlx5_core_dev *dev) +{ + /* Old firmware doesn't support ipsec_offload capability for VFs. This + * can be detected by checking reformat_add_esp_trasport capability - + * when this cap isn't supported it means firmware cannot be trusted + * about what it reports for ipsec_offload cap. + */ + return MLX5_CAP_FLOWTABLE_NIC_TX(dev, reformat_add_esp_trasport); +} + +int mlx5_esw_ipsec_vf_crypto_offload_supported(struct mlx5_core_dev *dev, + u16 vport_num) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *hca_cap, *query_cap; + int err; + + if (!mlx5_esw_ipsec_vf_offload_supported(dev)) + return -EOPNOTSUPP; + + err = esw_ipsec_offload_supported(dev, vport_num); + if (err) + return err; + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + if (!query_cap) + return -ENOMEM; + + err = mlx5_vport_get_other_func_cap(dev, vport_num, query_cap, MLX5_CAP_ETHERNET_OFFLOADS); + if (err) + goto free; + + hca_cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability); + if (!MLX5_GET(per_protocol_networking_offload_caps, hca_cap, swp)) + goto free; + +free: + kvfree(query_cap); + return err; +} + +int mlx5_esw_ipsec_vf_packet_offload_supported(struct mlx5_core_dev *dev, + u16 vport_num) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *hca_cap, *query_cap; + int ret; + + if (!mlx5_esw_ipsec_vf_offload_supported(dev)) + return -EOPNOTSUPP; + + ret = esw_ipsec_offload_supported(dev, vport_num); + if (ret) + return ret; + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + if (!query_cap) + return -ENOMEM; + + ret = mlx5_vport_get_other_func_cap(dev, vport_num, query_cap, MLX5_CAP_FLOW_TABLE); + if (ret) + goto out; + + hca_cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability); + if (!MLX5_GET(flow_table_nic_cap, hca_cap, flow_table_properties_nic_receive.decap)) { + ret = -EOPNOTSUPP; + goto out; + } + +out: + kvfree(query_cap); + return ret; +} + +int mlx5_esw_ipsec_vf_crypto_offload_set(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + bool enable) +{ + return esw_ipsec_vf_offload_set_bytype(esw, vport, enable, + MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD); +} + +int mlx5_esw_ipsec_vf_packet_offload_set(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + bool enable) +{ + return esw_ipsec_vf_offload_set_bytype(esw, vport, enable, + MLX5_ESW_VPORT_IPSEC_PACKET_OFFLOAD); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c new file mode 100644 index 000000000000..095f31f380fa --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "fs_core.h" +#include "eswitch.h" +#include "en_accel/ipsec.h" +#include "esw/ipsec_fs.h" +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) +#include "en/tc_priv.h" +#endif + +enum { + MLX5_ESW_IPSEC_RX_POL_FT_LEVEL, + MLX5_ESW_IPSEC_RX_ESP_FT_LEVEL, + MLX5_ESW_IPSEC_RX_ESP_FT_CHK_LEVEL, +}; + +enum { + MLX5_ESW_IPSEC_TX_POL_FT_LEVEL, + MLX5_ESW_IPSEC_TX_ESP_FT_LEVEL, + MLX5_ESW_IPSEC_TX_ESP_FT_CNT_LEVEL, +}; + +static void esw_ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + mlx5_del_flow_rules(rx->status_drop.rule); + mlx5_destroy_flow_group(rx->status_drop.group); + mlx5_fc_destroy(ipsec->mdev, rx->status_drop_cnt); +} + +static void esw_ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + mlx5_del_flow_rules(rx->status.rule); + mlx5_chains_put_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0); +} + +static int esw_ipsec_rx_status_drop_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table *ft = rx->ft.status; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_fc *flow_counter; + struct mlx5_flow_spec *spec; + struct mlx5_flow_group *g; + u32 *flow_group_in; + int err = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!flow_group_in || !spec) { + err = -ENOMEM; + goto err_out; + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1); + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop flow group, err=%d\n", err); + goto err_out; + } + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule counter, err=%d\n", err); + goto err_cnt; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(flow_counter); + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule, err=%d\n", err); + goto err_rule; + } + + rx->status_drop.group = g; + rx->status_drop.rule = rule; + rx->status_drop_cnt = flow_counter; + + kvfree(flow_group_in); + kvfree(spec); + return 0; + +err_rule: + mlx5_fc_destroy(mdev, flow_counter); +err_cnt: + mlx5_destroy_flow_group(g); +err_out: + kvfree(flow_group_in); + kvfree(spec); + return err; +} + +static int esw_ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_2.ipsec_syndrome); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.ipsec_syndrome, 0); + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + flow_act.flags = FLOW_ACT_NO_APPEND; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(ipsec->mdev, + "Failed to add ipsec rx status pass rule, err=%d\n", err); + goto err_rule; + } + + rx->status.rule = rule; + kvfree(spec); + return 0; + +err_rule: + kvfree(spec); + return err; +} + +void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + esw_ipsec_rx_status_pass_destroy(ipsec, rx); + esw_ipsec_rx_status_drop_destroy(ipsec, rx); +} + +int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *dest) +{ + int err; + + err = esw_ipsec_rx_status_drop_create(ipsec, rx); + if (err) + return err; + + err = esw_ipsec_rx_status_pass_create(ipsec, rx, dest); + if (err) + goto err_pass_create; + + return 0; + +err_pass_create: + esw_ipsec_rx_status_drop_destroy(ipsec, rx); + return err; +} + +void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx_create_attr *attr) +{ + attr->prio = FDB_CRYPTO_INGRESS; + attr->pol_level = MLX5_ESW_IPSEC_RX_POL_FT_LEVEL; + attr->sa_level = MLX5_ESW_IPSEC_RX_ESP_FT_LEVEL; + attr->status_level = MLX5_ESW_IPSEC_RX_ESP_FT_CHK_LEVEL; + attr->chains_ns = MLX5_FLOW_NAMESPACE_FDB; +} + +int mlx5_esw_ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec, + struct mlx5_flow_destination *dest) +{ + dest->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest->ft = mlx5_chains_get_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0); + + return 0; +} + +int mlx5_esw_ipsec_rx_setup_modify_header(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_flow_act *flow_act) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_modify_hdr *modify_hdr; + u32 mapped_id; + int err; + + err = xa_alloc_bh(&ipsec->rx_esw->ipsec_obj_id_map, &mapped_id, + xa_mk_value(sa_entry->ipsec_obj_id), + XA_LIMIT(1, ESW_IPSEC_RX_MAPPED_ID_MASK), 0); + if (err) + return err; + + /* reuse tunnel bits for ipsec, + * tun_id is always 0 and tun_opts is mapped to ipsec_obj_id. + */ + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_1); + MLX5_SET(set_action_in, action, offset, ESW_ZONE_ID_BITS); + MLX5_SET(set_action_in, action, length, + ESW_TUN_ID_BITS + ESW_TUN_OPTS_BITS); + MLX5_SET(set_action_in, action, data, mapped_id); + + modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB, + 1, action); + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + goto err_header_alloc; + } + + sa_entry->rx_mapped_id = mapped_id; + flow_act->modify_hdr = modify_hdr; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + return 0; + +err_header_alloc: + xa_erase_bh(&ipsec->rx_esw->ipsec_obj_id_map, mapped_id); + return err; +} + +void mlx5_esw_ipsec_rx_id_mapping_remove(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + + if (sa_entry->rx_mapped_id) + xa_erase_bh(&ipsec->rx_esw->ipsec_obj_id_map, + sa_entry->rx_mapped_id); +} + +int mlx5_esw_ipsec_rx_ipsec_obj_id_search(struct mlx5e_priv *priv, u32 id, + u32 *ipsec_obj_id) +{ + struct mlx5e_ipsec *ipsec = priv->ipsec; + void *val; + + val = xa_load(&ipsec->rx_esw->ipsec_obj_id_map, id); + if (!val) + return -ENOENT; + + *ipsec_obj_id = xa_to_value(val); + + return 0; +} + +void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_tx_create_attr *attr) +{ + attr->prio = FDB_CRYPTO_EGRESS; + attr->pol_level = MLX5_ESW_IPSEC_TX_POL_FT_LEVEL; + attr->sa_level = MLX5_ESW_IPSEC_TX_ESP_FT_LEVEL; + attr->cnt_level = MLX5_ESW_IPSEC_TX_ESP_FT_CNT_LEVEL; + attr->chains_ns = MLX5_FLOW_NAMESPACE_FDB; +} + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) +static int mlx5_esw_ipsec_modify_flow_dests(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_attr *attr; + int err; + + attr = flow->attr; + esw_attr = attr->esw_attr; + if (esw_attr->out_count - esw_attr->split_count > 1) + return 0; + + err = mlx5_eswitch_restore_ipsec_rule(esw, flow->rule[0], esw_attr, + esw_attr->out_count - 1); + + return err; +} +#endif + +void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev) +{ +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep; + struct mlx5e_rep_priv *rpriv; + struct rhashtable_iter iter; + struct mlx5e_tc_flow *flow; + unsigned long i; + int err; + + xa_for_each(&esw->offloads.vport_reps, i, rep) { + rpriv = rep->rep_data[REP_ETH].priv; + if (!rpriv || !rpriv->netdev) + continue; + + rhashtable_walk_enter(&rpriv->tc_ht, &iter); + rhashtable_walk_start(&iter); + while ((flow = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(flow)) + continue; + + err = mlx5_esw_ipsec_modify_flow_dests(esw, flow); + if (err) + mlx5_core_warn_once(mdev, + "Failed to modify flow dests for IPsec"); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + } +#endif +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h new file mode 100644 index 000000000000..0c90f7a8b0d3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_ESW_IPSEC_FS_H__ +#define __MLX5_ESW_IPSEC_FS_H__ + +struct mlx5e_ipsec; +struct mlx5e_ipsec_sa_entry; + +#ifdef CONFIG_MLX5_ESWITCH +void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx); +int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *dest); +void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx_create_attr *attr); +int mlx5_esw_ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec, + struct mlx5_flow_destination *dest); +int mlx5_esw_ipsec_rx_setup_modify_header(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_flow_act *flow_act); +void mlx5_esw_ipsec_rx_id_mapping_remove(struct mlx5e_ipsec_sa_entry *sa_entry); +int mlx5_esw_ipsec_rx_ipsec_obj_id_search(struct mlx5e_priv *priv, u32 id, + u32 *ipsec_obj_id); +void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_tx_create_attr *attr); +void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev); +#else +static inline void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) {} + +static inline int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *dest) +{ + return -EINVAL; +} + +static inline void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx_create_attr *attr) {} + +static inline int mlx5_esw_ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec, + struct mlx5_flow_destination *dest) +{ + return -EINVAL; +} + +static inline int mlx5_esw_ipsec_rx_setup_modify_header(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_flow_act *flow_act) +{ + return -EINVAL; +} + +static inline void mlx5_esw_ipsec_rx_id_mapping_remove(struct mlx5e_ipsec_sa_entry *sa_entry) {} + +static inline int mlx5_esw_ipsec_rx_ipsec_obj_id_search(struct mlx5e_priv *priv, u32 id, + u32 *ipsec_obj_id) +{ + return -EINVAL; +} + +static inline void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_tx_create_attr *attr) {} + +static inline void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev) {} +#endif /* CONFIG_MLX5_ESWITCH */ +#endif /* __MLX5_ESW_IPSEC_FS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 7c79476cc5f9..1887a24ee414 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -740,7 +740,7 @@ int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name, u64 *rate, struct netlink_ext_ack *extack) { - u32 link_speed_max, reminder; + u32 link_speed_max, remainder; u64 value; int err; @@ -750,8 +750,8 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char * return err; } - value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder); - if (reminder) { + value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder); + if (remainder) { pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n", name, *rate); NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 243c455f1029..6cd7d6497e10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -48,6 +48,7 @@ #include "devlink.h" #include "ecpf.h" #include "en/mod_hdr.h" +#include "en_accel/ipsec.h" enum { MLX5_ACTION_NONE = 0, @@ -77,18 +78,31 @@ static int mlx5_eswitch_check(const struct mlx5_core_dev *dev) return 0; } -struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink) +static struct mlx5_eswitch *__mlx5_devlink_eswitch_get(struct devlink *devlink, bool check) { struct mlx5_core_dev *dev = devlink_priv(devlink); int err; - err = mlx5_eswitch_check(dev); - if (err) - return ERR_PTR(err); + if (check) { + err = mlx5_eswitch_check(dev); + if (err) + return ERR_PTR(err); + } return dev->priv.eswitch; } +struct mlx5_eswitch *__must_check +mlx5_devlink_eswitch_get(struct devlink *devlink) +{ + return __mlx5_devlink_eswitch_get(devlink, true); +} + +struct mlx5_eswitch *mlx5_devlink_eswitch_nocheck_get(struct devlink *devlink) +{ + return __mlx5_devlink_eswitch_get(devlink, false); +} + struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num) { @@ -818,6 +832,8 @@ static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport * hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); vport->info.mig_enabled = MLX5_GET(cmd_hca_cap_2, hca_caps, migratable); + + err = mlx5_esw_ipsec_vf_offload_get(esw->dev, vport); out_free: kfree(query_ctx); return err; @@ -882,16 +898,12 @@ static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport esw_vport_cleanup_acl(esw, vport); } -int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num, +int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, enum mlx5_eswitch_vport_event enabled_events) { - struct mlx5_vport *vport; + u16 vport_num = vport->vport; int ret; - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) - return PTR_ERR(vport); - mutex_lock(&esw->state_lock); WARN_ON(vport->enabled); @@ -904,6 +916,9 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num, /* Sync with current vport context */ vport->enabled_events = enabled_events; vport->enabled = true; + if (vport->vport != MLX5_VPORT_PF && + (vport->info.ipsec_crypto_enabled || vport->info.ipsec_packet_enabled)) + esw->enabled_ipsec_vf_count++; /* Esw manager is trusted by default. Host PF (vport 0) is trusted as well * in smartNIC as it's a vport group manager. @@ -912,7 +927,7 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num, (!vport_num && mlx5_core_is_ecpf(esw->dev))) vport->info.trusted = true; - if (!mlx5_esw_is_manager_vport(esw, vport->vport) && + if (!mlx5_esw_is_manager_vport(esw, vport_num) && MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { ret = mlx5_esw_vport_vhca_id_set(esw, vport_num); if (ret) @@ -939,15 +954,12 @@ err_vhca_mapping: return ret; } -void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) +void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - struct mlx5_vport *vport; - - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) - return; + u16 vport_num = vport->vport; mutex_lock(&esw->state_lock); + if (!vport->enabled) goto done; @@ -957,12 +969,16 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) /* Disable events from this vport */ if (MLX5_CAP_GEN(esw->dev, log_max_l2_table)) - arm_vport_context_events_cmd(esw->dev, vport->vport, 0); + arm_vport_context_events_cmd(esw->dev, vport_num, 0); - if (!mlx5_esw_is_manager_vport(esw, vport->vport) && + if (!mlx5_esw_is_manager_vport(esw, vport_num) && MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) mlx5_esw_vport_vhca_id_clear(esw, vport_num); + if (vport->vport != MLX5_VPORT_PF && + (vport->info.ipsec_crypto_enabled || vport->info.ipsec_packet_enabled)) + esw->enabled_ipsec_vf_count--; + /* We don't assume VFs will cleanup after themselves. * Calling vport change handler while vport is disabled will cleanup * the vport resources. @@ -1068,31 +1084,104 @@ static void mlx5_eswitch_clear_ec_vf_vports_info(struct mlx5_eswitch *esw) } } -/* Public E-Switch API */ -int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, - enum mlx5_eswitch_vport_event enabled_events) +static int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + enum mlx5_eswitch_vport_event enabled_events) { int err; - err = mlx5_esw_vport_enable(esw, vport_num, enabled_events); + err = mlx5_esw_vport_enable(esw, vport, enabled_events); if (err) return err; - err = esw_offloads_load_rep(esw, vport_num); + err = mlx5_esw_offloads_load_rep(esw, vport); if (err) goto err_rep; return err; err_rep: - mlx5_esw_vport_disable(esw, vport_num); + mlx5_esw_vport_disable(esw, vport); + return err; +} + +static void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + mlx5_esw_offloads_unload_rep(esw, vport); + mlx5_esw_vport_disable(esw, vport); +} + +static int mlx5_eswitch_load_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events) +{ + struct mlx5_vport *vport; + int err; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + err = mlx5_esw_offloads_init_pf_vf_rep(esw, vport); + if (err) + return err; + + err = mlx5_eswitch_load_vport(esw, vport, enabled_events); + if (err) + goto err_load; + return 0; + +err_load: + mlx5_esw_offloads_cleanup_pf_vf_rep(esw, vport); + return err; +} + +static void mlx5_eswitch_unload_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return; + + mlx5_eswitch_unload_vport(esw, vport); + mlx5_esw_offloads_cleanup_pf_vf_rep(esw, vport); +} + +int mlx5_eswitch_load_sf_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events, + struct mlx5_devlink_port *dl_port, u32 controller, u32 sfnum) +{ + struct mlx5_vport *vport; + int err; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + err = mlx5_esw_offloads_init_sf_rep(esw, vport, dl_port, controller, sfnum); + if (err) + return err; + + err = mlx5_eswitch_load_vport(esw, vport, enabled_events); + if (err) + goto err_load; + + return 0; + +err_load: + mlx5_esw_offloads_cleanup_sf_rep(esw, vport); return err; } -void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num) +void mlx5_eswitch_unload_sf_vport(struct mlx5_eswitch *esw, u16 vport_num) { - esw_offloads_unload_rep(esw, vport_num); - mlx5_esw_vport_disable(esw, vport_num); + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return; + + mlx5_eswitch_unload_vport(esw, vport); + mlx5_esw_offloads_cleanup_sf_rep(esw, vport); } void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) @@ -1103,7 +1192,7 @@ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) { if (!vport->enabled) continue; - mlx5_eswitch_unload_vport(esw, vport->vport); + mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport); } } @@ -1116,7 +1205,7 @@ static void mlx5_eswitch_unload_ec_vf_vports(struct mlx5_eswitch *esw, mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) { if (!vport->enabled) continue; - mlx5_eswitch_unload_vport(esw, vport->vport); + mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport); } } @@ -1128,7 +1217,7 @@ int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, int err; mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) { - err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events); + err = mlx5_eswitch_load_pf_vf_vport(esw, vport->vport, enabled_events); if (err) goto vf_err; } @@ -1148,7 +1237,7 @@ static int mlx5_eswitch_load_ec_vf_vports(struct mlx5_eswitch *esw, u16 num_ec_v int err; mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) { - err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events); + err = mlx5_eswitch_load_pf_vf_vport(esw, vport->vport, enabled_events); if (err) goto vf_err; } @@ -1190,7 +1279,7 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, int ret; /* Enable PF vport */ - ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_PF, enabled_events); + ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF, enabled_events); if (ret) return ret; @@ -1201,7 +1290,7 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, /* Enable ECPF vport */ if (mlx5_ecpf_vport_exists(esw->dev)) { - ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_ECPF, enabled_events); + ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_ECPF, enabled_events); if (ret) goto ecpf_err; if (mlx5_core_ec_sriov_enabled(esw->dev)) { @@ -1224,11 +1313,11 @@ vf_err: mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs); ec_vf_err: if (mlx5_ecpf_vport_exists(esw->dev)) - mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); ecpf_err: host_pf_disable_hca(esw->dev); pf_hca_err: - mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF); + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF); return ret; } @@ -1242,11 +1331,11 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) if (mlx5_ecpf_vport_exists(esw->dev)) { if (mlx5_core_ec_sriov_enabled(esw->dev)) mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_vfs); - mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); } host_pf_disable_hca(esw->dev); - mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF); + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF); } static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw) @@ -1919,6 +2008,12 @@ bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num) return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_VF); } +bool mlx5_eswitch_is_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + return vport_num == MLX5_VPORT_PF || + mlx5_eswitch_is_vf_vport(esw, vport_num); +} + bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num) { return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_SF); @@ -2251,3 +2346,34 @@ struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw) return mlx5_esw_allowed(esw) ? esw->dev : NULL; } EXPORT_SYMBOL(mlx5_eswitch_get_core_dev); + +bool mlx5_eswitch_block_ipsec(struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + + if (!mlx5_esw_allowed(esw)) + return true; + + mutex_lock(&esw->state_lock); + if (esw->enabled_ipsec_vf_count) { + mutex_unlock(&esw->state_lock); + return false; + } + + dev->num_ipsec_offloads++; + mutex_unlock(&esw->state_lock); + return true; +} + +void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + + if (!mlx5_esw_allowed(esw)) + /* Failure means no eswitch => core dev is not a PF */ + return; + + mutex_lock(&esw->state_lock); + dev->num_ipsec_offloads--; + mutex_unlock(&esw->state_lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ae0dc8a3060d..37ab66e7b403 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -163,6 +163,8 @@ struct mlx5_vport_info { u8 trusted: 1; u8 roce_enabled: 1; u8 mig_enabled: 1; + u8 ipsec_crypto_enabled: 1; + u8 ipsec_packet_enabled: 1; }; /* Vport context events */ @@ -172,6 +174,29 @@ enum mlx5_eswitch_vport_event { MLX5_VPORT_PROMISC_CHANGE = BIT(3), }; +struct mlx5_vport; + +struct mlx5_devlink_port { + struct devlink_port dl_port; + struct mlx5_vport *vport; +}; + +static inline void mlx5_devlink_port_init(struct mlx5_devlink_port *dl_port, + struct mlx5_vport *vport) +{ + dl_port->vport = vport; +} + +static inline struct mlx5_devlink_port *mlx5_devlink_port_get(struct devlink_port *dl_port) +{ + return container_of(dl_port, struct mlx5_devlink_port, dl_port); +} + +static inline struct mlx5_vport *mlx5_devlink_port_vport_get(struct devlink_port *dl_port) +{ + return mlx5_devlink_port_get(dl_port)->vport; +} + struct mlx5_vport { struct mlx5_core_dev *dev; struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; @@ -200,7 +225,7 @@ struct mlx5_vport { bool enabled; enum mlx5_eswitch_vport_event enabled_events; int index; - struct devlink_port *dl_port; + struct mlx5_devlink_port *dl_port; }; struct mlx5_esw_indir_table; @@ -254,6 +279,7 @@ struct mlx5_esw_offload { struct mlx5_flow_group *vport_rx_group; struct mlx5_flow_group *vport_rx_drop_group; struct mlx5_flow_handle *vport_rx_drop_rule; + struct mlx5_flow_table *ft_ipsec_tx_pol; struct xarray vport_reps; struct list_head peer_flows[MLX5_MAX_PORTS]; struct mutex peer_mutex; @@ -269,6 +295,7 @@ struct mlx5_esw_offload { u8 inline_mode; atomic64_t num_flows; u64 num_block_encap; + u64 num_block_mode; enum devlink_eswitch_encap_mode encap; struct ida vport_metadata_ida; unsigned int host_number; /* ECPF supports one external host */ @@ -354,6 +381,8 @@ struct mlx5_eswitch { } params; struct blocking_notifier_head n_head; struct xarray paired; + struct mlx5_devcom_comp_dev *devcom; + u16 enabled_ipsec_vf_count; }; void esw_offloads_disable(struct mlx5_eswitch *esw); @@ -381,8 +410,9 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf); void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw); void mlx5_eswitch_disable(struct mlx5_eswitch *esw); -void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw); +void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key); void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw); +bool mlx5_esw_offloads_devcom_is_ready(struct mlx5_eswitch *esw); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, u16 vport, const u8 *mac); int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, @@ -531,6 +561,16 @@ int mlx5_devlink_port_fn_migratable_get(struct devlink_port *port, bool *is_enab struct netlink_ext_ack *extack); int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable, struct netlink_ext_ack *extack); +#ifdef CONFIG_XFRM_OFFLOAD +int mlx5_devlink_port_fn_ipsec_crypto_get(struct devlink_port *port, bool *is_enabled, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_ipsec_crypto_set(struct devlink_port *port, bool enable, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_ipsec_packet_get(struct devlink_port *port, bool *is_enabled, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port, bool enable, + struct netlink_ext_ack *extack); +#endif /* CONFIG_XFRM_OFFLOAD */ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type); int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, @@ -671,11 +711,16 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); MLX5_CAP_GEN_2((esw->dev), ec_vf_vport_base) +\ (last) - 1) -struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink); +struct mlx5_eswitch *__must_check +mlx5_devlink_eswitch_get(struct devlink *devlink); + +struct mlx5_eswitch *mlx5_devlink_eswitch_nocheck_get(struct devlink *devlink); + struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num); +bool mlx5_eswitch_is_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num); bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num); int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data); @@ -685,9 +730,9 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, enum mlx5_eswitch_vport_event enabled_events); void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw); -int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num, +int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, enum mlx5_eswitch_vport_event enabled_events); -void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport); int esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, @@ -725,31 +770,40 @@ void mlx5_esw_set_spec_source_port(struct mlx5_eswitch *esw, u16 vport, struct mlx5_flow_spec *spec); -int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num); -void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_offloads_init_pf_vf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void mlx5_esw_offloads_cleanup_pf_vf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +int mlx5_esw_offloads_init_sf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + struct mlx5_devlink_port *dl_port, + u32 controller, u32 sfnum); +void mlx5_esw_offloads_cleanup_sf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport); -int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num); -void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_offloads_load_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void mlx5_esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport); -int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, - enum mlx5_eswitch_vport_event enabled_events); -void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_eswitch_load_sf_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events, + struct mlx5_devlink_port *dl_port, u32 controller, u32 sfnum); +void mlx5_eswitch_unload_sf_vport(struct mlx5_eswitch *esw, u16 vport_num); int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, enum mlx5_eswitch_vport_event enabled_events); void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs); -int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num); -void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); -struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_offloads_pf_vf_devlink_port_init(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); +void mlx5_esw_offloads_pf_vf_devlink_port_cleanup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); + +int mlx5_esw_offloads_sf_devlink_port_init(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + struct mlx5_devlink_port *dl_port, + u32 controller, u32 sfnum); +void mlx5_esw_offloads_sf_devlink_port_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); -int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, - u16 vport_num, u32 controller, u32 sfnum); -void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num); -int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port, - u16 vport_num, u32 controller, u32 sfnum); -void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num); int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id); int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num); @@ -788,6 +842,9 @@ int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw); bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev); void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev); +int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev); +void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev); + static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw) { if (mlx5_esw_allowed(esw)) @@ -809,6 +866,24 @@ mlx5_eswitch_get_slow_fdb(struct mlx5_eswitch *esw) return esw->fdb_table.offloads.slow_fdb; } +int mlx5_eswitch_restore_ipsec_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule, + struct mlx5_esw_flow_attr *esw_attr, int attr_idx); +bool mlx5_eswitch_block_ipsec(struct mlx5_core_dev *dev); +void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev); +bool mlx5_esw_ipsec_vf_offload_supported(struct mlx5_core_dev *dev); +int mlx5_esw_ipsec_vf_offload_get(struct mlx5_core_dev *dev, + struct mlx5_vport *vport); +int mlx5_esw_ipsec_vf_crypto_offload_supported(struct mlx5_core_dev *dev, + u16 vport_num); +int mlx5_esw_ipsec_vf_crypto_offload_set(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + bool enable); +int mlx5_esw_ipsec_vf_packet_offload_set(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + bool enable); +int mlx5_esw_ipsec_vf_packet_offload_supported(struct mlx5_core_dev *dev, + u16 vport_num); +void mlx5_esw_vport_ipsec_offload_enable(struct mlx5_eswitch *esw); +void mlx5_esw_vport_ipsec_offload_disable(struct mlx5_eswitch *esw); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } @@ -816,8 +891,9 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; } static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {} static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {} -static inline void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) {} +static inline void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key) {} static inline void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) {} +static inline bool mlx5_esw_offloads_devcom_is_ready(struct mlx5_eswitch *esw) { return false; } static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } static inline int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; } @@ -866,6 +942,15 @@ static inline bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev) static inline void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev) { } + +static inline int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev) {} +static inline bool mlx5_eswitch_block_ipsec(struct mlx5_core_dev *dev) +{ + return false; +} + +static inline void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev) {} #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index bdfe609cc9ec..752fb0dfb111 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -375,7 +375,6 @@ esw_setup_indir_table(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, - bool ignore_flow_lvl, int *i) { struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; @@ -385,8 +384,7 @@ esw_setup_indir_table(struct mlx5_flow_destination *dest, return -EOPNOTSUPP; for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) { - if (ignore_flow_lvl) - flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; dest[*i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[*i].ft = mlx5_esw_indir_table_get(esw, attr, @@ -424,10 +422,51 @@ esw_cleanup_chain_dest(struct mlx5_fs_chains *chains, u32 chain, u32 prio, u32 l mlx5_chains_put_table(chains, chain, prio, level); } +static bool esw_same_vhca_id(struct mlx5_core_dev *mdev1, struct mlx5_core_dev *mdev2) +{ + return MLX5_CAP_GEN(mdev1, vhca_id) == MLX5_CAP_GEN(mdev2, vhca_id); +} + +static bool esw_setup_uplink_fwd_ipsec_needed(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *esw_attr, + int attr_idx) +{ + if (esw->offloads.ft_ipsec_tx_pol && + esw_attr->dests[attr_idx].rep && + esw_attr->dests[attr_idx].rep->vport == MLX5_VPORT_UPLINK && + /* To be aligned with software, encryption is needed only for tunnel device */ + (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) && + esw_attr->dests[attr_idx].rep != esw_attr->in_rep && + esw_same_vhca_id(esw_attr->dests[attr_idx].mdev, esw->dev)) + return true; + + return false; +} + +static bool esw_flow_dests_fwd_ipsec_check(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *esw_attr) +{ + int i; + + if (!esw->offloads.ft_ipsec_tx_pol) + return true; + + for (i = 0; i < esw_attr->split_count; i++) + if (esw_setup_uplink_fwd_ipsec_needed(esw, esw_attr, i)) + return false; + + for (i = esw_attr->split_count; i < esw_attr->out_count; i++) + if (esw_setup_uplink_fwd_ipsec_needed(esw, esw_attr, i) && + (esw_attr->out_count - esw_attr->split_count > 1)) + return false; + + return true; +} + static void -esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, - struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr, - int attr_idx, int dest_idx, bool pkt_reformat) +esw_setup_dest_fwd_vport(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr, + int attr_idx, int dest_idx, bool pkt_reformat) { dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport; @@ -449,6 +488,33 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f } } +static void +esw_setup_dest_fwd_ipsec(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr, + int attr_idx, int dest_idx, bool pkt_reformat) +{ + dest[dest_idx].ft = esw->offloads.ft_ipsec_tx_pol; + dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + if (pkt_reformat && + esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) { + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat; + } +} + +static void +esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr, + int attr_idx, int dest_idx, bool pkt_reformat) +{ + if (esw_setup_uplink_fwd_ipsec_needed(esw, esw_attr, attr_idx)) + esw_setup_dest_fwd_ipsec(dest, flow_act, esw, esw_attr, + attr_idx, dest_idx, pkt_reformat); + else + esw_setup_dest_fwd_vport(dest, flow_act, esw, esw_attr, + attr_idx, dest_idx, pkt_reformat); +} + static int esw_setup_vport_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr, @@ -469,6 +535,28 @@ esw_src_port_rewrite_supported(struct mlx5_eswitch *esw) MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level); } +static bool +esw_dests_to_vf_pf_vports(struct mlx5_flow_destination *dests, int max_dest) +{ + bool vf_dest = false, pf_dest = false; + int i; + + for (i = 0; i < max_dest; i++) { + if (dests[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT) + continue; + + if (dests[i].vport.num == MLX5_VPORT_UPLINK) + pf_dest = true; + else + vf_dest = true; + + if (vf_dest && pf_dest) + return true; + } + + return false; +} + static int esw_setup_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, @@ -501,7 +589,7 @@ esw_setup_dests(struct mlx5_flow_destination *dest, err = esw_setup_mtu_dest(dest, &attr->meter_attr, *i); (*i)++; } else if (esw_is_indir_table(esw, attr)) { - err = esw_setup_indir_table(dest, flow_act, esw, attr, true, i); + err = esw_setup_indir_table(dest, flow_act, esw, attr, i); } else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) { err = esw_setup_chain_src_port_rewrite(dest, flow_act, esw, chains, attr, i); } else { @@ -575,6 +663,9 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) return ERR_PTR(-EOPNOTSUPP); + if (!esw_flow_dests_fwd_ipsec_check(esw, esw_attr)) + return ERR_PTR(-EOPNOTSUPP); + dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL); if (!dest) return ERR_PTR(-ENOMEM); @@ -602,6 +693,15 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, rule = ERR_PTR(err); goto err_create_goto_table; } + + /* Header rewrite with combined wire+loopback in FDB is not allowed */ + if ((flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) && + esw_dests_to_vf_pf_vports(dest, i)) { + esw_warn(esw->dev, + "FDB: Header rewrite with forwarding to both PF and VF is not allowed\n"); + rule = ERR_PTR(-EINVAL); + goto err_esw_get; + } } if (esw_attr->decap_pkt_reformat) @@ -884,6 +984,17 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + if (rep->vport == MLX5_VPORT_UPLINK && on_esw->offloads.ft_ipsec_tx_pol) { + dest.ft = on_esw->offloads.ft_ipsec_tx_pol; + flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + } else { + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = rep->vport; + dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id); + dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + } + if (MLX5_CAP_ESW_FLOWTABLE(on_esw->dev, flow_source) && rep->vport == MLX5_VPORT_UPLINK) spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; @@ -1436,7 +1547,6 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb) esw_init_chains_offload_flags(esw, &attr.flags); attr.ns = MLX5_FLOW_NAMESPACE_FDB; - attr.fs_base_prio = FDB_TC_OFFLOAD; attr.max_grp_num = esw->params.large_group_num; attr.default_ft = miss_fdb; attr.mapping = esw->offloads.reg_c0_obj_pool; @@ -2391,7 +2501,7 @@ static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) __esw_offloads_unload_rep(esw, rep, rep_type); } -int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num) +static int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_eswitch_rep *rep; int rep_type; @@ -2415,7 +2525,7 @@ err_reps: return err; } -void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num) +static void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_eswitch_rep *rep; int rep_type; @@ -2425,39 +2535,63 @@ void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num) __esw_offloads_unload_rep(esw, rep, rep_type); } -int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num) +int mlx5_esw_offloads_init_pf_vf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return 0; + + return mlx5_esw_offloads_pf_vf_devlink_port_init(esw, vport); +} + +void mlx5_esw_offloads_cleanup_pf_vf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return; + + mlx5_esw_offloads_pf_vf_devlink_port_cleanup(esw, vport); +} + +int mlx5_esw_offloads_init_sf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + struct mlx5_devlink_port *dl_port, + u32 controller, u32 sfnum) +{ + return mlx5_esw_offloads_sf_devlink_port_init(esw, vport, dl_port, controller, sfnum); +} + +void mlx5_esw_offloads_cleanup_sf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + mlx5_esw_offloads_sf_devlink_port_cleanup(esw, vport); +} + +int mlx5_esw_offloads_load_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { int err; if (esw->mode != MLX5_ESWITCH_OFFLOADS) return 0; - if (vport_num != MLX5_VPORT_UPLINK) { - err = mlx5_esw_offloads_devlink_port_register(esw, vport_num); - if (err) - return err; - } + err = mlx5_esw_offloads_devlink_port_register(esw, vport); + if (err) + return err; - err = mlx5_esw_offloads_rep_load(esw, vport_num); + err = mlx5_esw_offloads_rep_load(esw, vport->vport); if (err) goto load_err; return err; load_err: - if (vport_num != MLX5_VPORT_UPLINK) - mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); + mlx5_esw_offloads_devlink_port_unregister(esw, vport); return err; } -void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num) +void mlx5_esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { if (esw->mode != MLX5_ESWITCH_OFFLOADS) return; - mlx5_esw_offloads_rep_unload(esw, vport_num); + mlx5_esw_offloads_rep_unload(esw, vport->vport); - if (vport_num != MLX5_VPORT_UPLINK) - mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); + mlx5_esw_offloads_devlink_port_unregister(esw, vport); } static int esw_set_slave_root_fdb(struct mlx5_core_dev *master, @@ -2779,9 +2913,9 @@ static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw, struct mlx5_eswitch *peer_esw, bool pair) { - u8 peer_idx = mlx5_get_dev_index(peer_esw->dev); + u16 peer_vhca_id = MLX5_CAP_GEN(peer_esw->dev, vhca_id); + u16 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); struct mlx5_flow_root_namespace *peer_ns; - u8 idx = mlx5_get_dev_index(esw->dev); struct mlx5_flow_root_namespace *ns; int err; @@ -2789,18 +2923,18 @@ static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw, ns = esw->dev->priv.steering->fdb_root_ns; if (pair) { - err = mlx5_flow_namespace_set_peer(ns, peer_ns, peer_idx); + err = mlx5_flow_namespace_set_peer(ns, peer_ns, peer_vhca_id); if (err) return err; - err = mlx5_flow_namespace_set_peer(peer_ns, ns, idx); + err = mlx5_flow_namespace_set_peer(peer_ns, ns, vhca_id); if (err) { - mlx5_flow_namespace_set_peer(ns, NULL, peer_idx); + mlx5_flow_namespace_set_peer(ns, NULL, peer_vhca_id); return err; } } else { - mlx5_flow_namespace_set_peer(ns, NULL, peer_idx); - mlx5_flow_namespace_set_peer(peer_ns, NULL, idx); + mlx5_flow_namespace_set_peer(ns, NULL, peer_vhca_id); + mlx5_flow_namespace_set_peer(peer_ns, NULL, vhca_id); } return 0; @@ -2811,7 +2945,6 @@ static int mlx5_esw_offloads_devcom_event(int event, void *event_data) { struct mlx5_eswitch *esw = my_data; - struct mlx5_devcom *devcom = esw->dev->priv.devcom; struct mlx5_eswitch *peer_esw = event_data; u16 esw_i, peer_esw_i; bool esw_paired; @@ -2833,6 +2966,7 @@ static int mlx5_esw_offloads_devcom_event(int event, err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true); if (err) goto err_out; + err = mlx5_esw_offloads_pair(esw, peer_esw); if (err) goto err_peer; @@ -2851,7 +2985,7 @@ static int mlx5_esw_offloads_devcom_event(int event, esw->num_peers++; peer_esw->num_peers++; - mlx5_devcom_comp_set_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true); + mlx5_devcom_comp_set_ready(esw->devcom, true); break; case ESW_OFFLOADS_DEVCOM_UNPAIR: @@ -2861,7 +2995,7 @@ static int mlx5_esw_offloads_devcom_event(int event, peer_esw->num_peers--; esw->num_peers--; if (!esw->num_peers && !peer_esw->num_peers) - mlx5_devcom_comp_set_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); + mlx5_devcom_comp_set_ready(esw->devcom, false); xa_erase(&peer_esw->paired, esw_i); xa_erase(&esw->paired, peer_esw_i); mlx5_esw_offloads_unpair(peer_esw, esw); @@ -2886,9 +3020,8 @@ err_out: return err; } -void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) +void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key) { - struct mlx5_devcom *devcom = esw->dev->priv.devcom; int i; for (i = 0; i < MLX5_MAX_PORTS; i++) @@ -2898,38 +3031,44 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) return; - if (!mlx5_lag_is_supported(esw->dev)) + if ((MLX5_VPORT_MANAGER(esw->dev) || mlx5_core_is_ecpf_esw_manager(esw->dev)) && + !mlx5_lag_is_supported(esw->dev)) return; xa_init(&esw->paired); - mlx5_devcom_register_component(devcom, - MLX5_DEVCOM_ESW_OFFLOADS, - mlx5_esw_offloads_devcom_event, - esw); - esw->num_peers = 0; - mlx5_devcom_send_event(devcom, - MLX5_DEVCOM_ESW_OFFLOADS, + esw->devcom = mlx5_devcom_register_component(esw->dev->priv.devc, + MLX5_DEVCOM_ESW_OFFLOADS, + key, + mlx5_esw_offloads_devcom_event, + esw); + if (IS_ERR_OR_NULL(esw->devcom)) + return; + + mlx5_devcom_send_event(esw->devcom, ESW_OFFLOADS_DEVCOM_PAIR, - ESW_OFFLOADS_DEVCOM_UNPAIR, esw); + ESW_OFFLOADS_DEVCOM_UNPAIR, + esw); } void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) { - struct mlx5_devcom *devcom = esw->dev->priv.devcom; - - if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) - return; - - if (!mlx5_lag_is_supported(esw->dev)) + if (IS_ERR_OR_NULL(esw->devcom)) return; - mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS, + mlx5_devcom_send_event(esw->devcom, ESW_OFFLOADS_DEVCOM_UNPAIR, - ESW_OFFLOADS_DEVCOM_UNPAIR, esw); + ESW_OFFLOADS_DEVCOM_UNPAIR, + esw); - mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + mlx5_devcom_unregister_component(esw->devcom); xa_destroy(&esw->paired); + esw->devcom = NULL; +} + +bool mlx5_esw_offloads_devcom_is_ready(struct mlx5_eswitch *esw) +{ + return mlx5_devcom_comp_is_ready(esw->devcom); } bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw) @@ -3356,7 +3495,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; /* Uplink vport rep must load first. */ - err = esw_offloads_load_rep(esw, MLX5_VPORT_UPLINK); + err = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK); if (err) goto err_uplink; @@ -3367,7 +3506,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) return 0; err_vports: - esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); + mlx5_esw_offloads_rep_unload(esw, MLX5_VPORT_UPLINK); err_uplink: esw_offloads_steering_cleanup(esw); err_steering_init: @@ -3405,7 +3544,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, void esw_offloads_disable(struct mlx5_eswitch *esw) { mlx5_eswitch_disable_pf_vf_vports(esw); - esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); + mlx5_esw_offloads_rep_unload(esw, MLX5_VPORT_UPLINK); esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); mapping_destroy(esw->offloads.reg_c0_obj_pool); @@ -3495,13 +3634,43 @@ static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink) struct net *devl_net, *netdev_net; struct mlx5_eswitch *esw; - esw = mlx5_devlink_eswitch_get(devlink); + esw = mlx5_devlink_eswitch_nocheck_get(devlink); netdev_net = dev_net(esw->dev->mlx5e_res.uplink_netdev); devl_net = devlink_net(devlink); return net_eq(devl_net, netdev_net); } +int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + int err; + + if (!mlx5_esw_allowed(esw)) + return 0; + + /* Take TC into account */ + err = mlx5_esw_try_lock(esw); + if (err < 0) + return err; + + esw->offloads.num_block_mode++; + mlx5_esw_unlock(esw); + return 0; +} + +void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + + if (!mlx5_esw_allowed(esw)) + return; + + down_write(&esw->mode_lock); + esw->offloads.num_block_mode--; + up_write(&esw->mode_lock); +} + int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { @@ -3535,6 +3704,13 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (cur_mlx5_mode == mlx5_mode) goto unlock; + if (esw->offloads.num_block_mode) { + NL_SET_ERR_MSG_MOD(extack, + "Can't change eswitch mode when IPsec SA and/or policies are configured"); + err = -EOPNOTSUPP; + goto unlock; + } + mlx5_eswitch_disable_locked(esw); if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) { if (mlx5_devlink_trap_get_num_active(esw->dev)) { @@ -3694,38 +3870,28 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev) { - struct devlink *devlink = priv_to_devlink(dev); - struct mlx5_eswitch *esw; + struct mlx5_eswitch *esw = dev->priv.eswitch; - devl_lock(devlink); - esw = mlx5_devlink_eswitch_get(devlink); - if (IS_ERR(esw)) { - devl_unlock(devlink); - /* Failure means no eswitch => not possible to change encap */ + if (!mlx5_esw_allowed(esw)) return true; - } down_write(&esw->mode_lock); if (esw->mode != MLX5_ESWITCH_LEGACY && esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { up_write(&esw->mode_lock); - devl_unlock(devlink); return false; } esw->offloads.num_block_encap++; up_write(&esw->mode_lock); - devl_unlock(devlink); return true; } void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev) { - struct devlink *devlink = priv_to_devlink(dev); - struct mlx5_eswitch *esw; + struct mlx5_eswitch *esw = dev->priv.eswitch; - esw = mlx5_devlink_eswitch_get(devlink); - if (IS_ERR(esw)) + if (!mlx5_esw_allowed(esw)) return; down_write(&esw->mode_lock); @@ -3921,38 +4087,6 @@ u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); -int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port, - u16 vport_num, u32 controller, u32 sfnum) -{ - int err; - - err = mlx5_esw_vport_enable(esw, vport_num, MLX5_VPORT_UC_ADDR_CHANGE); - if (err) - return err; - - err = mlx5_esw_devlink_sf_port_register(esw, dl_port, vport_num, controller, sfnum); - if (err) - goto devlink_err; - - err = mlx5_esw_offloads_rep_load(esw, vport_num); - if (err) - goto rep_err; - return 0; - -rep_err: - mlx5_esw_devlink_sf_port_unregister(esw, vport_num); -devlink_err: - mlx5_esw_vport_disable(esw, vport_num); - return err; -} - -void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) -{ - mlx5_esw_offloads_rep_unload(esw, vport_num); - mlx5_esw_devlink_sf_port_unregister(esw, vport_num); - mlx5_esw_vport_disable(esw, vport_num); -} - static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num, u16 *vhca_id) { int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); @@ -4041,35 +4175,12 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_set); -static bool -is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num) -{ - return vport_num == MLX5_VPORT_PF || - mlx5_eswitch_is_vf_vport(esw, vport_num) || - mlx5_esw_is_sf_vport(esw, vport_num); -} - int mlx5_devlink_port_fn_hw_addr_get(struct devlink_port *port, u8 *hw_addr, int *hw_addr_len, struct netlink_ext_ack *extack) { - struct mlx5_eswitch *esw; - struct mlx5_vport *vport; - u16 vport_num; - - esw = mlx5_devlink_eswitch_get(port->devlink); - if (IS_ERR(esw)) - return PTR_ERR(esw); - - vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); - if (!is_port_function_supported(esw, vport_num)) - return -EOPNOTSUPP; - - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) { - NL_SET_ERR_MSG_MOD(extack, "Invalid port"); - return PTR_ERR(vport); - } + struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); mutex_lock(&esw->state_lock); ether_addr_copy(hw_addr, vport->info.mac); @@ -4082,100 +4193,55 @@ int mlx5_devlink_port_fn_hw_addr_set(struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack) { - struct mlx5_eswitch *esw; - u16 vport_num; - - esw = mlx5_devlink_eswitch_get(port->devlink); - if (IS_ERR(esw)) { - NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr"); - return PTR_ERR(esw); - } - - vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); - if (!is_port_function_supported(esw, vport_num)) { - NL_SET_ERR_MSG_MOD(extack, "Port doesn't support set hw_addr"); - return -EINVAL; - } + struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); - return mlx5_eswitch_set_vport_mac(esw, vport_num, hw_addr); -} - -static struct mlx5_vport * -mlx5_devlink_port_fn_get_vport(struct devlink_port *port, struct mlx5_eswitch *esw) -{ - u16 vport_num; - - if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) - return ERR_PTR(-EOPNOTSUPP); - - vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); - if (!is_port_function_supported(esw, vport_num)) - return ERR_PTR(-EOPNOTSUPP); - - return mlx5_eswitch_get_vport(esw, vport_num); + return mlx5_eswitch_set_vport_mac(esw, vport->vport, hw_addr); } int mlx5_devlink_port_fn_migratable_get(struct devlink_port *port, bool *is_enabled, struct netlink_ext_ack *extack) { - struct mlx5_eswitch *esw; - struct mlx5_vport *vport; - int err = -EOPNOTSUPP; - - esw = mlx5_devlink_eswitch_get(port->devlink); - if (IS_ERR(esw)) - return PTR_ERR(esw); + struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); if (!MLX5_CAP_GEN(esw->dev, migration)) { NL_SET_ERR_MSG_MOD(extack, "Device doesn't support migration"); - return err; + return -EOPNOTSUPP; } - vport = mlx5_devlink_port_fn_get_vport(port, esw); - if (IS_ERR(vport)) { - NL_SET_ERR_MSG_MOD(extack, "Invalid port"); - return PTR_ERR(vport); + if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support VHCA management"); + return -EOPNOTSUPP; } mutex_lock(&esw->state_lock); - if (vport->enabled) { - *is_enabled = vport->info.mig_enabled; - err = 0; - } + *is_enabled = vport->info.mig_enabled; mutex_unlock(&esw->state_lock); - return err; + return 0; } int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable, struct netlink_ext_ack *extack) { + struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); - struct mlx5_eswitch *esw; - struct mlx5_vport *vport; void *query_ctx; void *hca_caps; - int err = -EOPNOTSUPP; - - esw = mlx5_devlink_eswitch_get(port->devlink); - if (IS_ERR(esw)) - return PTR_ERR(esw); + int err; if (!MLX5_CAP_GEN(esw->dev, migration)) { NL_SET_ERR_MSG_MOD(extack, "Device doesn't support migration"); - return err; + return -EOPNOTSUPP; } - vport = mlx5_devlink_port_fn_get_vport(port, esw); - if (IS_ERR(vport)) { - NL_SET_ERR_MSG_MOD(extack, "Invalid port"); - return PTR_ERR(vport); + if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support VHCA management"); + return -EOPNOTSUPP; } mutex_lock(&esw->state_lock); - if (!vport->enabled) { - NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled"); - goto out; - } if (vport->info.mig_enabled == enable) { err = 0; @@ -4196,7 +4262,7 @@ int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable, } hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, 1); + MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, enable); err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport->vport, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2); @@ -4217,56 +4283,37 @@ out: int mlx5_devlink_port_fn_roce_get(struct devlink_port *port, bool *is_enabled, struct netlink_ext_ack *extack) { - struct mlx5_eswitch *esw; - struct mlx5_vport *vport; - int err = -EOPNOTSUPP; - - esw = mlx5_devlink_eswitch_get(port->devlink); - if (IS_ERR(esw)) - return PTR_ERR(esw); + struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); - vport = mlx5_devlink_port_fn_get_vport(port, esw); - if (IS_ERR(vport)) { - NL_SET_ERR_MSG_MOD(extack, "Invalid port"); - return PTR_ERR(vport); + if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support VHCA management"); + return -EOPNOTSUPP; } mutex_lock(&esw->state_lock); - if (vport->enabled) { - *is_enabled = vport->info.roce_enabled; - err = 0; - } + *is_enabled = vport->info.roce_enabled; mutex_unlock(&esw->state_lock); - return err; + return 0; } int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable, struct netlink_ext_ack *extack) { + struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); - struct mlx5_eswitch *esw; - struct mlx5_vport *vport; - int err = -EOPNOTSUPP; + u16 vport_num = vport->vport; void *query_ctx; void *hca_caps; - u16 vport_num; - - esw = mlx5_devlink_eswitch_get(port->devlink); - if (IS_ERR(esw)) - return PTR_ERR(esw); + int err; - vport = mlx5_devlink_port_fn_get_vport(port, esw); - if (IS_ERR(vport)) { - NL_SET_ERR_MSG_MOD(extack, "Invalid port"); - return PTR_ERR(vport); + if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support VHCA management"); + return -EOPNOTSUPP; } - vport_num = vport->vport; mutex_lock(&esw->state_lock); - if (!vport->enabled) { - NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled"); - goto out; - } if (vport->info.roce_enabled == enable) { err = 0; @@ -4304,3 +4351,188 @@ out: mutex_unlock(&esw->state_lock); return err; } + +int +mlx5_eswitch_restore_ipsec_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule, + struct mlx5_esw_flow_attr *esw_attr, int attr_idx) +{ + struct mlx5_flow_destination new_dest = {}; + struct mlx5_flow_destination old_dest = {}; + + if (!esw_setup_uplink_fwd_ipsec_needed(esw, esw_attr, attr_idx)) + return 0; + + esw_setup_dest_fwd_ipsec(&old_dest, NULL, esw, esw_attr, attr_idx, 0, false); + esw_setup_dest_fwd_vport(&new_dest, NULL, esw, esw_attr, attr_idx, 0, false); + + return mlx5_modify_rule_destination(rule, &new_dest, &old_dest); +} + +#ifdef CONFIG_XFRM_OFFLOAD +int mlx5_devlink_port_fn_ipsec_crypto_get(struct devlink_port *port, bool *is_enabled, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw; + struct mlx5_vport *vport; + int err = 0; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + if (!mlx5_esw_ipsec_vf_offload_supported(esw->dev)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPSec crypto"); + return -EOPNOTSUPP; + } + + vport = mlx5_devlink_port_vport_get(port); + + mutex_lock(&esw->state_lock); + if (!vport->enabled) { + err = -EOPNOTSUPP; + goto unlock; + } + + *is_enabled = vport->info.ipsec_crypto_enabled; +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_devlink_port_fn_ipsec_crypto_set(struct devlink_port *port, bool enable, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw; + struct mlx5_vport *vport; + u16 vport_num; + int err; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); + err = mlx5_esw_ipsec_vf_crypto_offload_supported(esw->dev, vport_num); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Device doesn't support IPsec crypto"); + return err; + } + + vport = mlx5_devlink_port_vport_get(port); + + mutex_lock(&esw->state_lock); + if (!vport->enabled) { + err = -EOPNOTSUPP; + NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled"); + goto unlock; + } + + if (vport->info.ipsec_crypto_enabled == enable) + goto unlock; + + if (!esw->enabled_ipsec_vf_count && esw->dev->num_ipsec_offloads) { + err = -EBUSY; + goto unlock; + } + + err = mlx5_esw_ipsec_vf_crypto_offload_set(esw, vport, enable); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to set IPsec crypto"); + goto unlock; + } + + vport->info.ipsec_crypto_enabled = enable; + if (enable) + esw->enabled_ipsec_vf_count++; + else + esw->enabled_ipsec_vf_count--; +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_devlink_port_fn_ipsec_packet_get(struct devlink_port *port, bool *is_enabled, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw; + struct mlx5_vport *vport; + int err = 0; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + if (!mlx5_esw_ipsec_vf_offload_supported(esw->dev)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPsec packet"); + return -EOPNOTSUPP; + } + + vport = mlx5_devlink_port_vport_get(port); + + mutex_lock(&esw->state_lock); + if (!vport->enabled) { + err = -EOPNOTSUPP; + goto unlock; + } + + *is_enabled = vport->info.ipsec_packet_enabled; +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port, + bool enable, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw; + struct mlx5_vport *vport; + u16 vport_num; + int err; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); + err = mlx5_esw_ipsec_vf_packet_offload_supported(esw->dev, vport_num); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Device doesn't support IPsec packet mode"); + return err; + } + + vport = mlx5_devlink_port_vport_get(port); + mutex_lock(&esw->state_lock); + if (!vport->enabled) { + err = -EOPNOTSUPP; + NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled"); + goto unlock; + } + + if (vport->info.ipsec_packet_enabled == enable) + goto unlock; + + if (!esw->enabled_ipsec_vf_count && esw->dev->num_ipsec_offloads) { + err = -EBUSY; + goto unlock; + } + + err = mlx5_esw_ipsec_vf_packet_offload_set(esw, vport, enable); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to set IPsec packet mode"); + goto unlock; + } + + vport->info.ipsec_packet_enabled = enable; + if (enable) + esw->enabled_ipsec_vf_count++; + else + esw->enabled_ipsec_vf_count--; +unlock: + mutex_unlock(&esw->state_lock); + return err; +} +#endif /* CONFIG_XFRM_OFFLOAD */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index 12abe991583a..c4de6bf8d1b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -445,7 +445,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) goto err_cqwq; } - err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn); + err = mlx5_comp_eqn_get(mdev, smp_processor_id(), &eqn); if (err) { kvfree(in); goto err_cqwq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c index 39c03dcbd196..e5c1012921d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c @@ -57,7 +57,7 @@ static const char * const mlx5_fpga_qp_error_strings[] = { }; static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void) { - struct mlx5_fpga_device *fdev = NULL; + struct mlx5_fpga_device *fdev; fdev = kzalloc(sizeof(*fdev), GFP_KERNEL); if (!fdev) @@ -252,7 +252,7 @@ out: int mlx5_fpga_init(struct mlx5_core_dev *mdev) { - struct mlx5_fpga_device *fdev = NULL; + struct mlx5_fpga_device *fdev; if (!MLX5_CAP_GEN(mdev, fpga)) { mlx5_core_dbg(mdev, "FPGA capability not present\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 91dcb0dcad10..a4b925331661 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -140,7 +140,7 @@ static void mlx5_cmd_stub_modify_header_dealloc(struct mlx5_flow_root_namespace static int mlx5_cmd_stub_set_peer(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_root_namespace *peer_ns, - u8 peer_idx) + u16 peer_vhca_id) { return 0; } @@ -245,12 +245,20 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, mlx5_lag_is_shared_fdb(dev) && mlx5_lag_is_master(dev)) { struct mlx5_core_dev *peer_dev; - int i; + int i, j; mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { err = mlx5_cmd_set_slave_root_fdb(dev, peer_dev, !disconnect, (!disconnect) ? ft->id : 0); if (err && !disconnect) { + mlx5_lag_for_each_peer_mdev(dev, peer_dev, j) { + if (j < i) + mlx5_cmd_set_slave_root_fdb(dev, peer_dev, 1, + ns->root_ft->id); + else + break; + } + MLX5_SET(set_flow_table_root_in, in, op_mod, 0); MLX5_SET(set_flow_table_root_in, in, table_id, ns->root_ft->id); @@ -967,6 +975,7 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns, max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions); table_type = FS_FT_ESW_INGRESS_ACL; break; + case MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC: case MLX5_FLOW_NAMESPACE_RDMA_TX: max_actions = MLX5_CAP_FLOWTABLE_RDMA_TX(dev, max_modify_header_actions); table_type = FS_FT_RDMA_TX; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index b6b9a5a20591..7790ae5531e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -94,7 +94,7 @@ struct mlx5_flow_cmds { int (*set_peer)(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_root_namespace *peer_ns, - u8 peer_idx); + u16 peer_vhca_id); int (*create_ns)(struct mlx5_flow_root_namespace *ns); int (*destroy_ns)(struct mlx5_flow_root_namespace *ns); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 4ef04aa28771..a13b9c2bd144 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -107,7 +107,7 @@ LEFTOVERS_NUM_PRIOS) #define KERNEL_RX_MACSEC_NUM_PRIOS 1 -#define KERNEL_RX_MACSEC_NUM_LEVELS 2 +#define KERNEL_RX_MACSEC_NUM_LEVELS 3 #define KERNEL_RX_MACSEC_MIN_LEVEL (BY_PASS_MIN_LEVEL + KERNEL_RX_MACSEC_NUM_PRIOS) #define ETHTOOL_PRIO_NUM_LEVELS 1 @@ -224,6 +224,7 @@ static struct init_tree_node egress_root_fs = { enum { RDMA_RX_IPSEC_PRIO, + RDMA_RX_MACSEC_PRIO, RDMA_RX_COUNTERS_PRIO, RDMA_RX_BYPASS_PRIO, RDMA_RX_KERNEL_PRIO, @@ -237,9 +238,13 @@ enum { #define RDMA_RX_KERNEL_MIN_LEVEL (RDMA_RX_BYPASS_MIN_LEVEL + 1) #define RDMA_RX_COUNTERS_MIN_LEVEL (RDMA_RX_KERNEL_MIN_LEVEL + 2) +#define RDMA_RX_MACSEC_NUM_PRIOS 1 +#define RDMA_RX_MACSEC_PRIO_NUM_LEVELS 2 +#define RDMA_RX_MACSEC_MIN_LEVEL (RDMA_RX_COUNTERS_MIN_LEVEL + RDMA_RX_MACSEC_NUM_PRIOS) + static struct init_tree_node rdma_rx_root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 4, + .ar_size = 5, .children = (struct init_tree_node[]) { [RDMA_RX_IPSEC_PRIO] = ADD_PRIO(0, RDMA_RX_IPSEC_MIN_LEVEL, 0, @@ -247,6 +252,12 @@ static struct init_tree_node rdma_rx_root_fs = { ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(RDMA_RX_IPSEC_NUM_PRIOS, RDMA_RX_IPSEC_NUM_LEVELS))), + [RDMA_RX_MACSEC_PRIO] = + ADD_PRIO(0, RDMA_RX_MACSEC_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(RDMA_RX_MACSEC_NUM_PRIOS, + RDMA_RX_MACSEC_PRIO_NUM_LEVELS))), [RDMA_RX_COUNTERS_PRIO] = ADD_PRIO(0, RDMA_RX_COUNTERS_MIN_LEVEL, 0, FS_CHAINING_CAPS, @@ -270,6 +281,7 @@ static struct init_tree_node rdma_rx_root_fs = { enum { RDMA_TX_COUNTERS_PRIO, RDMA_TX_IPSEC_PRIO, + RDMA_TX_MACSEC_PRIO, RDMA_TX_BYPASS_PRIO, }; @@ -280,9 +292,13 @@ enum { #define RDMA_TX_IPSEC_PRIO_NUM_LEVELS 1 #define RDMA_TX_IPSEC_MIN_LEVEL (RDMA_TX_COUNTERS_MIN_LEVEL + RDMA_TX_IPSEC_NUM_PRIOS) +#define RDMA_TX_MACSEC_NUM_PRIOS 1 +#define RDMA_TX_MACESC_PRIO_NUM_LEVELS 1 +#define RDMA_TX_MACSEC_MIN_LEVEL (RDMA_TX_COUNTERS_MIN_LEVEL + RDMA_TX_MACSEC_NUM_PRIOS) + static struct init_tree_node rdma_tx_root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 3, + .ar_size = 4, .children = (struct init_tree_node[]) { [RDMA_TX_COUNTERS_PRIO] = ADD_PRIO(0, RDMA_TX_COUNTERS_MIN_LEVEL, 0, @@ -296,7 +312,12 @@ static struct init_tree_node rdma_tx_root_fs = { ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(RDMA_TX_IPSEC_NUM_PRIOS, RDMA_TX_IPSEC_PRIO_NUM_LEVELS))), - + [RDMA_TX_MACSEC_PRIO] = + ADD_PRIO(0, RDMA_TX_MACSEC_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(RDMA_TX_MACSEC_NUM_PRIOS, + RDMA_TX_MACESC_PRIO_NUM_LEVELS))), [RDMA_TX_BYPASS_PRIO] = ADD_PRIO(0, RDMA_TX_BYPASS_MIN_LEVEL, 0, FS_CHAINING_CAPS_RDMA_TX, @@ -889,7 +910,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, struct fs_node *iter = list_entry(start, struct fs_node, list); struct mlx5_flow_table *ft = NULL; - if (!root || root->type == FS_TYPE_PRIO_CHAINS) + if (!root) return NULL; list_for_each_advance_continue(iter, &root->children, reverse) { @@ -905,20 +926,42 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, return ft; } -/* If reverse is false then return the first flow table in next priority of - * prio in the tree, else return the last flow table in the previous priority - * of prio in the tree. +static struct fs_node *find_prio_chains_parent(struct fs_node *parent, + struct fs_node **child) +{ + struct fs_node *node = NULL; + + while (parent && parent->type != FS_TYPE_PRIO_CHAINS) { + node = parent; + parent = parent->parent; + } + + if (child) + *child = node; + + return parent; +} + +/* If reverse is false then return the first flow table next to the passed node + * in the tree, else return the last flow table before the node in the tree. + * If skip is true, skip the flow tables in the same prio_chains prio. */ -static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse) +static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool reverse, + bool skip) { + struct fs_node *prio_chains_parent = NULL; struct mlx5_flow_table *ft = NULL; struct fs_node *curr_node; struct fs_node *parent; - parent = prio->node.parent; - curr_node = &prio->node; + if (skip) + prio_chains_parent = find_prio_chains_parent(node, NULL); + parent = node->parent; + curr_node = node; while (!ft && parent) { - ft = find_closest_ft_recursive(parent, &curr_node->list, reverse); + if (parent != prio_chains_parent) + ft = find_closest_ft_recursive(parent, &curr_node->list, + reverse); curr_node = parent; parent = curr_node->parent; } @@ -926,15 +969,15 @@ static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool revers } /* Assuming all the tree is locked by mutex chain lock */ -static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio) +static struct mlx5_flow_table *find_next_chained_ft(struct fs_node *node) { - return find_closest_ft(prio, false); + return find_closest_ft(node, false, true); } /* Assuming all the tree is locked by mutex chain lock */ -static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio) +static struct mlx5_flow_table *find_prev_chained_ft(struct fs_node *node) { - return find_closest_ft(prio, true); + return find_closest_ft(node, true, true); } static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft, @@ -946,7 +989,7 @@ static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft, next_ns = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS; fs_get_obj(prio, next_ns ? ft->ns->node.parent : ft->node.parent); - return find_next_chained_ft(prio); + return find_next_chained_ft(&prio->node); } static int connect_fts_in_prio(struct mlx5_core_dev *dev, @@ -970,21 +1013,55 @@ static int connect_fts_in_prio(struct mlx5_core_dev *dev, return 0; } +static struct mlx5_flow_table *find_closet_ft_prio_chains(struct fs_node *node, + struct fs_node *parent, + struct fs_node **child, + bool reverse) +{ + struct mlx5_flow_table *ft; + + ft = find_closest_ft(node, reverse, false); + + if (ft && parent == find_prio_chains_parent(&ft->node, child)) + return ft; + + return NULL; +} + /* Connect flow tables from previous priority of prio to ft */ static int connect_prev_fts(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, struct fs_prio *prio) { + struct fs_node *prio_parent, *parent = NULL, *child, *node; struct mlx5_flow_table *prev_ft; + int err = 0; - prev_ft = find_prev_chained_ft(prio); - if (prev_ft) { + prio_parent = find_prio_chains_parent(&prio->node, &child); + + /* return directly if not under the first sub ns of prio_chains prio */ + if (prio_parent && !list_is_first(&child->list, &prio_parent->children)) + return 0; + + prev_ft = find_prev_chained_ft(&prio->node); + while (prev_ft) { struct fs_prio *prev_prio; fs_get_obj(prev_prio, prev_ft->node.parent); - return connect_fts_in_prio(dev, prev_prio, ft); + err = connect_fts_in_prio(dev, prev_prio, ft); + if (err) + break; + + if (!parent) { + parent = find_prio_chains_parent(&prev_prio->node, &child); + if (!parent) + break; + } + + node = child; + prev_ft = find_closet_ft_prio_chains(node, parent, &child, true); } - return 0; + return err; } static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio @@ -1066,7 +1143,7 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle, } for (i = 0; i < handle->num_rules; i++) { - if (mlx5_flow_dests_cmp(new_dest, &handle->rule[i]->dest_attr)) + if (mlx5_flow_dests_cmp(old_dest, &handle->rule[i]->dest_attr)) return _mlx5_modify_rule_destination(handle->rule[i], new_dest); } @@ -1123,7 +1200,7 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table if (err) return err; - next_ft = first_ft ? first_ft : find_next_chained_ft(prio); + next_ft = first_ft ? first_ft : find_next_chained_ft(&prio->node); err = connect_fwd_rules(dev, ft, next_ft); if (err) return err; @@ -1198,7 +1275,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table); next_ft = unmanaged ? ft_attr->next_ft : - find_next_chained_ft(fs_prio); + find_next_chained_ft(&fs_prio->node); ft->def_miss_action = ns->def_miss_action; ft->ns = ns; err = root->cmds->create_flow_table(root, ft, ft_attr, next_ft); @@ -2195,13 +2272,20 @@ EXPORT_SYMBOL(mlx5_del_flow_rules); /* Assuming prio->node.children(flow tables) is sorted by level */ static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft) { + struct fs_node *prio_parent, *child; struct fs_prio *prio; fs_get_obj(prio, ft->node.parent); if (!list_is_last(&ft->node.list, &prio->node.children)) return list_next_entry(ft, node.list); - return find_next_chained_ft(prio); + + prio_parent = find_prio_chains_parent(&prio->node, &child); + + if (prio_parent && list_is_first(&child->list, &prio_parent->children)) + return find_closest_ft(&prio->node, false, false); + + return find_next_chained_ft(&prio->node); } static int update_root_ft_destroy(struct mlx5_flow_table *ft) @@ -2403,6 +2487,14 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, root_ns = steering->rdma_tx_root_ns; prio = RDMA_TX_IPSEC_PRIO; break; + case MLX5_FLOW_NAMESPACE_RDMA_RX_MACSEC: + root_ns = steering->rdma_rx_root_ns; + prio = RDMA_RX_MACSEC_PRIO; + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC: + root_ns = steering->rdma_tx_root_ns; + prio = RDMA_TX_MACSEC_PRIO; + break; default: /* Must be NIC RX */ WARN_ON(!is_nic_rx_ns(type)); root_ns = steering->root_ns; @@ -2987,6 +3079,12 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) if (err) goto out_err; + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_CRYPTO_INGRESS, 3); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); + goto out_err; + } + err = create_fdb_fast_path(steering); if (err) goto out_err; @@ -3009,6 +3107,12 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) goto out_err; } + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_CRYPTO_EGRESS, 3); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); + goto out_err; + } + /* We put this priority last, knowing that nothing will get here * unless explicitly forwarded to. This is possible because the * slow path tables have catch all rules and nothing gets passed @@ -3621,7 +3725,7 @@ void mlx5_destroy_match_definer(struct mlx5_core_dev *dev, int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_root_namespace *peer_ns, - u8 peer_idx) + u16 peer_vhca_id) { if (peer_ns && ns->mode != peer_ns->mode) { mlx5_core_err(ns->dev, @@ -3629,7 +3733,7 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, return -EINVAL; } - return ns->cmds->set_peer(ns, peer_ns, peer_idx); + return ns->cmds->set_peer(ns, peer_ns, peer_vhca_id); } /* This function should be called only at init stage of the namespace. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 03e64c4c245d..4aed1768b85f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -303,7 +303,7 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void); int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_root_namespace *peer_ns, - u8 peer_idx); + u16 peer_vhca_id); int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, enum mlx5_flow_steering_mode mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index fb2035a5ec99..58f4c0d0fafa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -143,90 +143,86 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) { int err; - err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR); if (err) return err; if (MLX5_CAP_GEN(dev, port_selection_cap)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_PORT_SELECTION); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_PORT_SELECTION, HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN(dev, hca_cap_2)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_GENERAL_2, HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN(dev, eth_net_offloads)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ETHERNET_OFFLOADS, + HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_IPOIB_ENHANCED_OFFLOADS); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_IPOIB_ENHANCED_OFFLOADS, + HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN(dev, pg)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ODP, HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN(dev, atomic)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ATOMIC, HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN(dev, roce)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ROCE, HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN(dev, nic_flow_table) || MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_FLOW_TABLE, HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_ESWITCH_MANAGER(dev)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ESWITCH_FLOW_TABLE, + HCA_CAP_OPMOD_GET_CUR); if (err) return err; - err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH); - if (err) - return err; - } - - if (MLX5_CAP_GEN(dev, vector_calc)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_VECTOR_CALC); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ESWITCH, HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN(dev, qos)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_QOS); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_QOS, HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN(dev, debug)) - mlx5_core_get_caps(dev, MLX5_CAP_DEBUG); + mlx5_core_get_caps_mode(dev, MLX5_CAP_DEBUG, HCA_CAP_OPMOD_GET_CUR); if (MLX5_CAP_GEN(dev, pcam_reg)) mlx5_get_pcam_reg(dev); if (MLX5_CAP_GEN(dev, mcam_reg)) { mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_FIRST_128); - mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_0x9080_0x90FF); mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_0x9100_0x917F); } @@ -234,57 +230,52 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) mlx5_get_qcam_reg(dev); if (MLX5_CAP_GEN(dev, device_memory)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_MEM); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_DEV_MEM, HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN(dev, event_cap)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_EVENT); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_DEV_EVENT, HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN(dev, tls_tx) || MLX5_CAP_GEN(dev, tls_rx)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_TLS); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_TLS, HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) { - err = mlx5_core_get_caps(dev, MLX5_CAP_VDPA_EMULATION); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_VDPA_EMULATION, HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN(dev, ipsec_offload)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_IPSEC); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_IPSEC, HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN(dev, crypto)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_CRYPTO); - if (err) - return err; - } - - if (MLX5_CAP_GEN(dev, shampo)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_SHAMPO); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_CRYPTO, HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD) { - err = mlx5_core_get_caps(dev, MLX5_CAP_MACSEC); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_MACSEC, HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN(dev, adv_virtualization)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_ADV_VIRTUALIZATION); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ADV_VIRTUALIZATION, + HCA_CAP_OPMOD_GET_CUR); if (err) return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 4804990b7f22..e87766f91150 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -127,17 +127,23 @@ static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev, if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state)) goto out; + if (!reset_state) + return 0; + switch (reset_state) { case MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION: case MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS: - NL_SET_ERR_MSG_MOD(extack, "Sync reset was already triggered"); + NL_SET_ERR_MSG_MOD(extack, "Sync reset still in progress"); return -EBUSY; - case MLX5_MFRL_REG_RESET_STATE_TIMEOUT: - NL_SET_ERR_MSG_MOD(extack, "Sync reset got timeout"); + case MLX5_MFRL_REG_RESET_STATE_NEG_TIMEOUT: + NL_SET_ERR_MSG_MOD(extack, "Sync reset negotiation timeout"); return -ETIMEDOUT; case MLX5_MFRL_REG_RESET_STATE_NACK: NL_SET_ERR_MSG_MOD(extack, "One of the hosts disabled reset"); return -EPERM; + case MLX5_MFRL_REG_RESET_STATE_UNLOAD_TIMEOUT: + NL_SET_ERR_MSG_MOD(extack, "Sync reset unload timeout"); + return -ETIMEDOUT; } out: @@ -151,7 +157,7 @@ int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {}; u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {}; - int err; + int err, rst_res; set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); @@ -164,13 +170,34 @@ int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, return 0; clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); - if (err == -EREMOTEIO && MLX5_CAP_MCAM_FEATURE(dev, reset_state)) - return mlx5_fw_reset_get_reset_state_err(dev, extack); + if (err == -EREMOTEIO && MLX5_CAP_MCAM_FEATURE(dev, reset_state)) { + rst_res = mlx5_fw_reset_get_reset_state_err(dev, extack); + return rst_res ? rst_res : err; + } NL_SET_ERR_MSG_MOD(extack, "Sync reset command failed"); return mlx5_cmd_check(dev, err, in, out); } +int mlx5_fw_reset_verify_fw_complete(struct mlx5_core_dev *dev, + struct netlink_ext_ack *extack) +{ + u8 rst_state; + int err; + + err = mlx5_fw_reset_get_reset_state_err(dev, extack); + if (err) + return err; + + rst_state = mlx5_get_fw_rst_state(dev); + if (!rst_state) + return 0; + + mlx5_core_err(dev, "Sync reset did not complete, state=%d\n", rst_state); + NL_SET_ERR_MSG_MOD(extack, "Sync reset did not complete successfully"); + return rst_state; +} + int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) { return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h index c57465595f7c..ea527d06a85f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h @@ -12,6 +12,8 @@ int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); +int mlx5_fw_reset_verify_fw_complete(struct mlx5_core_dev *dev, + struct netlink_ext_ack *extack); void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev); void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev); void mlx5_drain_fw_reset(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 187cb2c464f8..2fb2598b775e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -50,20 +50,6 @@ enum { }; enum { - MLX5_HEALTH_SYNDR_FW_ERR = 0x1, - MLX5_HEALTH_SYNDR_IRISC_ERR = 0x7, - MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR = 0x8, - MLX5_HEALTH_SYNDR_CRC_ERR = 0x9, - MLX5_HEALTH_SYNDR_FETCH_PCI_ERR = 0xa, - MLX5_HEALTH_SYNDR_HW_FTL_ERR = 0xb, - MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR = 0xc, - MLX5_HEALTH_SYNDR_EQ_ERR = 0xd, - MLX5_HEALTH_SYNDR_EQ_INV = 0xe, - MLX5_HEALTH_SYNDR_FFSER_ERR = 0xf, - MLX5_HEALTH_SYNDR_HIGH_TEMP = 0x10 -}; - -enum { MLX5_DROP_HEALTH_WORK, }; @@ -357,27 +343,27 @@ static int mlx5_health_try_recover(struct mlx5_core_dev *dev) static const char *hsynd_str(u8 synd) { switch (synd) { - case MLX5_HEALTH_SYNDR_FW_ERR: + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_INTERNAL_ERR: return "firmware internal error"; - case MLX5_HEALTH_SYNDR_IRISC_ERR: + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_DEAD_IRISC: return "irisc not responding"; - case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR: + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_HW_FATAL_ERR: return "unrecoverable hardware error"; - case MLX5_HEALTH_SYNDR_CRC_ERR: + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_CRC_ERR: return "firmware CRC error"; - case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR: + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_FETCH_PCI_ERR: return "ICM fetch PCI error"; - case MLX5_HEALTH_SYNDR_HW_FTL_ERR: + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PAGE_ERR: return "HW fatal error\n"; - case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR: + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ASYNCHRONOUS_EQ_BUF_OVERRUN: return "async EQ buffer overrun"; - case MLX5_HEALTH_SYNDR_EQ_ERR: + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_IN_ERR: return "EQ error"; - case MLX5_HEALTH_SYNDR_EQ_INV: + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_INV: return "Invalid EQ referenced"; - case MLX5_HEALTH_SYNDR_FFSER_ERR: + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR: return "FFSER error"; - case MLX5_HEALTH_SYNDR_HIGH_TEMP: + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR: return "High temperature"; default: return "unrecognized error"; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c new file mode 100644 index 000000000000..353f81dccd1c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c @@ -0,0 +1,418 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved + +#include <linux/hwmon.h> +#include <linux/bitmap.h> +#include <linux/mlx5/device.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/port.h> +#include "mlx5_core.h" +#include "hwmon.h" + +#define CHANNELS_TYPE_NUM 2 /* chip channel and temp channel */ +#define CHIP_CONFIG_NUM 1 + +/* module 0 is mapped to sensor_index 64 in MTMP register */ +#define to_mtmp_module_sensor_idx(idx) (64 + (idx)) + +/* All temperatures retrieved in units of 0.125C. hwmon framework expect + * it in units of millidegrees C. Hence multiply values by 125. + */ +#define mtmp_temp_to_mdeg(temp) ((temp) * 125) + +struct temp_channel_desc { + u32 sensor_index; + char sensor_name[32]; +}; + +/* chip_channel_config and channel_info arrays must be 0-terminated, hence + 1 */ +struct mlx5_hwmon { + struct mlx5_core_dev *mdev; + struct device *hwmon_dev; + struct hwmon_channel_info chip_info; + u32 chip_channel_config[CHIP_CONFIG_NUM + 1]; + struct hwmon_channel_info temp_info; + u32 *temp_channel_config; + const struct hwmon_channel_info *channel_info[CHANNELS_TYPE_NUM + 1]; + struct hwmon_chip_info chip; + struct temp_channel_desc *temp_channel_desc; + u32 asic_platform_scount; + u32 module_scount; +}; + +static int mlx5_hwmon_query_mtmp(struct mlx5_core_dev *mdev, u32 sensor_index, u32 *mtmp_out) +{ + u32 mtmp_in[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + + MLX5_SET(mtmp_reg, mtmp_in, sensor_index, sensor_index); + + return mlx5_core_access_reg(mdev, mtmp_in, sizeof(mtmp_in), + mtmp_out, MLX5_ST_SZ_BYTES(mtmp_reg), + MLX5_REG_MTMP, 0, 0); +} + +static int mlx5_hwmon_reset_max_temp(struct mlx5_core_dev *mdev, int sensor_index) +{ + u32 mtmp_out[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + u32 mtmp_in[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + + MLX5_SET(mtmp_reg, mtmp_in, sensor_index, sensor_index); + MLX5_SET(mtmp_reg, mtmp_in, mtr, 1); + + return mlx5_core_access_reg(mdev, mtmp_in, sizeof(mtmp_in), + mtmp_out, sizeof(mtmp_out), + MLX5_REG_MTMP, 0, 0); +} + +static int mlx5_hwmon_enable_max_temp(struct mlx5_core_dev *mdev, int sensor_index) +{ + u32 mtmp_out[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + u32 mtmp_in[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + int err; + + err = mlx5_hwmon_query_mtmp(mdev, sensor_index, mtmp_in); + if (err) + return err; + + MLX5_SET(mtmp_reg, mtmp_in, mte, 1); + return mlx5_core_access_reg(mdev, mtmp_in, sizeof(mtmp_in), + mtmp_out, sizeof(mtmp_out), + MLX5_REG_MTMP, 0, 1); +} + +static int mlx5_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, long *val) +{ + struct mlx5_hwmon *hwmon = dev_get_drvdata(dev); + u32 mtmp_out[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + int err; + + if (type != hwmon_temp) + return -EOPNOTSUPP; + + err = mlx5_hwmon_query_mtmp(hwmon->mdev, hwmon->temp_channel_desc[channel].sensor_index, + mtmp_out); + if (err) + return err; + + switch (attr) { + case hwmon_temp_input: + *val = mtmp_temp_to_mdeg(MLX5_GET(mtmp_reg, mtmp_out, temperature)); + return 0; + case hwmon_temp_highest: + *val = mtmp_temp_to_mdeg(MLX5_GET(mtmp_reg, mtmp_out, max_temperature)); + return 0; + case hwmon_temp_crit: + *val = mtmp_temp_to_mdeg(MLX5_GET(mtmp_reg, mtmp_out, temp_threshold_hi)); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int mlx5_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, long val) +{ + struct mlx5_hwmon *hwmon = dev_get_drvdata(dev); + + if (type != hwmon_temp || attr != hwmon_temp_reset_history) + return -EOPNOTSUPP; + + return mlx5_hwmon_reset_max_temp(hwmon->mdev, + hwmon->temp_channel_desc[channel].sensor_index); +} + +static umode_t mlx5_hwmon_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, + int channel) +{ + if (type != hwmon_temp) + return 0; + + switch (attr) { + case hwmon_temp_input: + case hwmon_temp_highest: + case hwmon_temp_crit: + case hwmon_temp_label: + return 0444; + case hwmon_temp_reset_history: + return 0200; + default: + return 0; + } +} + +static int mlx5_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, const char **str) +{ + struct mlx5_hwmon *hwmon = dev_get_drvdata(dev); + + if (type != hwmon_temp || attr != hwmon_temp_label) + return -EOPNOTSUPP; + + *str = (const char *)hwmon->temp_channel_desc[channel].sensor_name; + return 0; +} + +static const struct hwmon_ops mlx5_hwmon_ops = { + .read = mlx5_hwmon_read, + .read_string = mlx5_hwmon_read_string, + .is_visible = mlx5_hwmon_is_visible, + .write = mlx5_hwmon_write, +}; + +static int mlx5_hwmon_init_channels_names(struct mlx5_hwmon *hwmon) +{ + u32 i; + + for (i = 0; i < hwmon->asic_platform_scount + hwmon->module_scount; i++) { + u32 mtmp_out[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + char *sensor_name; + int err; + + err = mlx5_hwmon_query_mtmp(hwmon->mdev, hwmon->temp_channel_desc[i].sensor_index, + mtmp_out); + if (err) + return err; + + sensor_name = MLX5_ADDR_OF(mtmp_reg, mtmp_out, sensor_name_hi); + if (!*sensor_name) { + snprintf(hwmon->temp_channel_desc[i].sensor_name, + sizeof(hwmon->temp_channel_desc[i].sensor_name), "sensor%u", + hwmon->temp_channel_desc[i].sensor_index); + continue; + } + + memcpy(&hwmon->temp_channel_desc[i].sensor_name, sensor_name, + MLX5_FLD_SZ_BYTES(mtmp_reg, sensor_name_hi) + + MLX5_FLD_SZ_BYTES(mtmp_reg, sensor_name_lo)); + } + + return 0; +} + +static int mlx5_hwmon_get_module_sensor_index(struct mlx5_core_dev *mdev, u32 *module_index) +{ + int module_num; + int err; + + err = mlx5_query_module_num(mdev, &module_num); + if (err) + return err; + + *module_index = to_mtmp_module_sensor_idx(module_num); + + return 0; +} + +static int mlx5_hwmon_init_sensors_indexes(struct mlx5_hwmon *hwmon, u64 sensor_map) +{ + DECLARE_BITMAP(smap, BITS_PER_TYPE(sensor_map)); + unsigned long bit_pos; + int err = 0; + int i = 0; + + bitmap_from_u64(smap, sensor_map); + + for_each_set_bit(bit_pos, smap, BITS_PER_TYPE(sensor_map)) { + hwmon->temp_channel_desc[i].sensor_index = bit_pos; + i++; + } + + if (hwmon->module_scount) + err = mlx5_hwmon_get_module_sensor_index(hwmon->mdev, + &hwmon->temp_channel_desc[i].sensor_index); + + return err; +} + +static void mlx5_hwmon_channel_info_init(struct mlx5_hwmon *hwmon) +{ + int i; + + hwmon->channel_info[0] = &hwmon->chip_info; + hwmon->channel_info[1] = &hwmon->temp_info; + + hwmon->chip_channel_config[0] = HWMON_C_REGISTER_TZ; + hwmon->chip_info.config = (const u32 *)hwmon->chip_channel_config; + hwmon->chip_info.type = hwmon_chip; + + for (i = 0; i < hwmon->asic_platform_scount + hwmon->module_scount; i++) + hwmon->temp_channel_config[i] = HWMON_T_INPUT | HWMON_T_HIGHEST | HWMON_T_CRIT | + HWMON_T_RESET_HISTORY | HWMON_T_LABEL; + + hwmon->temp_info.config = (const u32 *)hwmon->temp_channel_config; + hwmon->temp_info.type = hwmon_temp; +} + +static int mlx5_hwmon_is_module_mon_cap(struct mlx5_core_dev *mdev, bool *mon_cap) +{ + u32 mtmp_out[MLX5_ST_SZ_DW(mtmp_reg)]; + u32 module_index; + int err; + + err = mlx5_hwmon_get_module_sensor_index(mdev, &module_index); + if (err) + return err; + + err = mlx5_hwmon_query_mtmp(mdev, module_index, mtmp_out); + if (err) + return err; + + if (MLX5_GET(mtmp_reg, mtmp_out, temperature)) + *mon_cap = true; + + return 0; +} + +static int mlx5_hwmon_get_sensors_count(struct mlx5_core_dev *mdev, u32 *asic_platform_scount) +{ + u32 mtcap_out[MLX5_ST_SZ_DW(mtcap_reg)] = {}; + u32 mtcap_in[MLX5_ST_SZ_DW(mtcap_reg)] = {}; + int err; + + err = mlx5_core_access_reg(mdev, mtcap_in, sizeof(mtcap_in), + mtcap_out, sizeof(mtcap_out), + MLX5_REG_MTCAP, 0, 0); + if (err) + return err; + + *asic_platform_scount = MLX5_GET(mtcap_reg, mtcap_out, sensor_count); + + return 0; +} + +static void mlx5_hwmon_free(struct mlx5_hwmon *hwmon) +{ + if (!hwmon) + return; + + kfree(hwmon->temp_channel_config); + kfree(hwmon->temp_channel_desc); + kfree(hwmon); +} + +static struct mlx5_hwmon *mlx5_hwmon_alloc(struct mlx5_core_dev *mdev) +{ + struct mlx5_hwmon *hwmon; + bool mon_cap = false; + u32 sensors_count; + int err; + + hwmon = kzalloc(sizeof(*mdev->hwmon), GFP_KERNEL); + if (!hwmon) + return ERR_PTR(-ENOMEM); + + err = mlx5_hwmon_get_sensors_count(mdev, &hwmon->asic_platform_scount); + if (err) + goto err_free_hwmon; + + /* check if module sensor has thermal mon cap. if yes, allocate channel desc for it */ + err = mlx5_hwmon_is_module_mon_cap(mdev, &mon_cap); + if (err) + goto err_free_hwmon; + + hwmon->module_scount = mon_cap ? 1 : 0; + sensors_count = hwmon->asic_platform_scount + hwmon->module_scount; + hwmon->temp_channel_desc = kcalloc(sensors_count, sizeof(*hwmon->temp_channel_desc), + GFP_KERNEL); + if (!hwmon->temp_channel_desc) { + err = -ENOMEM; + goto err_free_hwmon; + } + + /* sensors configuration values array, must be 0-terminated hence, + 1 */ + hwmon->temp_channel_config = kcalloc(sensors_count + 1, sizeof(*hwmon->temp_channel_config), + GFP_KERNEL); + if (!hwmon->temp_channel_config) { + err = -ENOMEM; + goto err_free_temp_channel_desc; + } + + hwmon->mdev = mdev; + + return hwmon; + +err_free_temp_channel_desc: + kfree(hwmon->temp_channel_desc); +err_free_hwmon: + kfree(hwmon); + return ERR_PTR(err); +} + +static int mlx5_hwmon_dev_init(struct mlx5_hwmon *hwmon) +{ + u32 mtcap_out[MLX5_ST_SZ_DW(mtcap_reg)] = {}; + u32 mtcap_in[MLX5_ST_SZ_DW(mtcap_reg)] = {}; + int err; + int i; + + err = mlx5_core_access_reg(hwmon->mdev, mtcap_in, sizeof(mtcap_in), + mtcap_out, sizeof(mtcap_out), + MLX5_REG_MTCAP, 0, 0); + if (err) + return err; + + mlx5_hwmon_channel_info_init(hwmon); + mlx5_hwmon_init_sensors_indexes(hwmon, MLX5_GET64(mtcap_reg, mtcap_out, sensor_map)); + err = mlx5_hwmon_init_channels_names(hwmon); + if (err) + return err; + + for (i = 0; i < hwmon->asic_platform_scount + hwmon->module_scount; i++) { + err = mlx5_hwmon_enable_max_temp(hwmon->mdev, + hwmon->temp_channel_desc[i].sensor_index); + if (err) + return err; + } + + hwmon->chip.ops = &mlx5_hwmon_ops; + hwmon->chip.info = (const struct hwmon_channel_info **)hwmon->channel_info; + + return 0; +} + +int mlx5_hwmon_dev_register(struct mlx5_core_dev *mdev) +{ + struct device *dev = mdev->device; + struct mlx5_hwmon *hwmon; + int err; + + if (!MLX5_CAP_MCAM_REG(mdev, mtmp)) + return 0; + + hwmon = mlx5_hwmon_alloc(mdev); + if (IS_ERR(hwmon)) + return PTR_ERR(hwmon); + + err = mlx5_hwmon_dev_init(hwmon); + if (err) + goto err_free_hwmon; + + hwmon->hwmon_dev = hwmon_device_register_with_info(dev, "mlx5", + hwmon, + &hwmon->chip, + NULL); + if (IS_ERR(hwmon->hwmon_dev)) { + err = PTR_ERR(hwmon->hwmon_dev); + goto err_free_hwmon; + } + + mdev->hwmon = hwmon; + return 0; + +err_free_hwmon: + mlx5_hwmon_free(hwmon); + return err; +} + +void mlx5_hwmon_dev_unregister(struct mlx5_core_dev *mdev) +{ + struct mlx5_hwmon *hwmon = mdev->hwmon; + + if (!hwmon) + return; + + hwmon_device_unregister(hwmon->hwmon_dev); + mlx5_hwmon_free(hwmon); + mdev->hwmon = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h new file mode 100644 index 000000000000..999654a9b9da --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved + */ +#ifndef __MLX5_HWMON_H__ +#define __MLX5_HWMON_H__ + +#include <linux/mlx5/driver.h> + +#if IS_ENABLED(CONFIG_HWMON) + +int mlx5_hwmon_dev_register(struct mlx5_core_dev *mdev); +void mlx5_hwmon_dev_unregister(struct mlx5_core_dev *mdev); + +#else +static inline int mlx5_hwmon_dev_register(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline void mlx5_hwmon_dev_unregister(struct mlx5_core_dev *mdev) {} + +#endif + +#endif /* __MLX5_HWMON_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index fa467335526e..047d5fed5f89 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -156,67 +156,57 @@ unlock: return least_loaded_irq; } -void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs, - int num_irqs) +void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq) { struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); - int i; - - for (i = 0; i < num_irqs; i++) { - int cpu = cpumask_first(mlx5_irq_get_affinity_mask(irqs[i])); + int cpu; - synchronize_irq(pci_irq_vector(pool->dev->pdev, - mlx5_irq_get_index(irqs[i]))); - if (mlx5_irq_put(irqs[i])) - if (pool->irqs_per_cpu) - cpu_put(pool, cpu); - } + cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq)); + synchronize_irq(pci_irq_vector(pool->dev->pdev, + mlx5_irq_get_index(irq))); + if (mlx5_irq_put(irq)) + if (pool->irqs_per_cpu) + cpu_put(pool, cpu); } /** - * mlx5_irq_affinity_irqs_request_auto - request one or more IRQs for mlx5 device. - * @dev: mlx5 device that is requesting the IRQs. - * @nirqs: number of IRQs to request. - * @irqs: an output array of IRQs pointers. + * mlx5_irq_affinity_irq_request_auto - request one IRQ for mlx5 device. + * @dev: mlx5 device that is requesting the IRQ. + * @used_cpus: cpumask of bounded cpus by the device + * @vecidx: vector index to request an IRQ for. * * Each IRQ is bounded to at most 1 CPU. - * This function is requesting IRQs according to the default assignment. + * This function is requesting an IRQ according to the default assignment. * The default assignment policy is: - * - in each iteration, request the least loaded IRQ which is not bound to any + * - request the least loaded IRQ which is not bound to any * CPU of the previous IRQs requested. * - * This function returns the number of IRQs requested, (which might be smaller than - * @nirqs), if successful, or a negative error code in case of an error. + * On success, this function updates used_cpus mask and returns an irq pointer. + * In case of an error, an appropriate error pointer is returned. */ -int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, - struct mlx5_irq **irqs) +struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev, + struct cpumask *used_cpus, u16 vecidx) { struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); struct irq_affinity_desc af_desc = {}; struct mlx5_irq *irq; - int i = 0; + + if (!mlx5_irq_pool_is_sf_pool(pool)) + return ERR_PTR(-ENOENT); af_desc.is_managed = 1; cpumask_copy(&af_desc.mask, cpu_online_mask); - for (i = 0; i < nirqs; i++) { - if (mlx5_irq_pool_is_sf_pool(pool)) - irq = mlx5_irq_affinity_request(pool, &af_desc); - else - /* In case SF pool doesn't exists, fallback to the PF IRQs. - * The PF IRQs are already allocated and binded to CPU - * at this point. Hence, only an index is needed. - */ - irq = mlx5_irq_request(dev, i, NULL, NULL); - if (IS_ERR(irq)) - break; - irqs[i] = irq; - cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), &af_desc.mask); - mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", - pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), - cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), - mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); - } - if (!i) - return PTR_ERR(irq); - return i; + cpumask_andnot(&af_desc.mask, &af_desc.mask, used_cpus); + irq = mlx5_irq_affinity_request(pool, &af_desc); + + if (IS_ERR(irq)) + return irq; + + cpumask_or(used_cpus, used_cpus, mlx5_irq_get_affinity_mask(irq)); + mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", + pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), + cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), + mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); + + return irq; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index f0a074b2fcdf..af3fac090b82 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -835,7 +835,7 @@ static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) dev = ldev->pf[MLX5_LAG_P1].dev; if (is_mdev_switchdev_mode(dev) && mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && - mlx5_devcom_comp_is_ready(dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS) && + mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) && MLX5_CAP_ESW(dev, esw_shared_ingress_acl) && mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1) return true; @@ -1268,16 +1268,6 @@ recheck: mlx5_ldev_put(ldev); } -bool mlx5_lag_is_supported(struct mlx5_core_dev *dev) -{ - if (!MLX5_CAP_GEN(dev, vport_group_manager) || - !MLX5_CAP_GEN(dev, lag_master) || - MLX5_CAP_GEN(dev, num_lag_ports) < 2 || - MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS) - return false; - return true; -} - void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) { int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index a061b1873e27..481e92f39fe6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -74,8 +74,6 @@ struct mlx5_lag { struct lag_mpesw lag_mpesw; }; -bool mlx5_lag_is_supported(struct mlx5_core_dev *dev); - static inline struct mlx5_lag * mlx5_lag_dev(struct mlx5_core_dev *dev) { @@ -115,4 +113,14 @@ void mlx5_lag_remove_devices(struct mlx5_lag *ldev); int mlx5_deactivate_lag(struct mlx5_lag *ldev); void mlx5_lag_add_devices(struct mlx5_lag *ldev); +static inline bool mlx5_lag_is_supported(struct mlx5_core_dev *dev) +{ + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + !MLX5_CAP_GEN(dev, lag_master) || + MLX5_CAP_GEN(dev, num_lag_ports) < 2 || + MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS) + return false; + return true; +} + #endif /* __MLX5_LAG_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index d3a3fe4ce670..7d9bbb494d95 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -574,7 +574,7 @@ static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, for (i = 0; i < ldev->ports; i++) { for (j = 0; j < ldev->buckets; j++) { idx = i * ldev->buckets + j; - if (ldev->v2p_map[i] == ports[i]) + if (ldev->v2p_map[idx] == ports[idx]) continue; dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c index 5a80fb7dbbca..40c7be124041 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c @@ -81,7 +81,7 @@ static int create_aso_cq(struct mlx5_aso_cq *cq, void *cqc_data) int inlen, eqn; int err; - err = mlx5_vector2eqn(mdev, 0, &eqn); + err = mlx5_comp_eqn_get(mdev, 0, &eqn); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 973babfaff25..aa29f09e8356 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -32,6 +32,7 @@ #include <linux/clocksource.h> #include <linux/highmem.h> +#include <linux/log2.h> #include <linux/ptp_clock_kernel.h> #include <rdma/mlx5-abi.h> #include "lib/eq.h" @@ -39,10 +40,6 @@ #include "clock.h" enum { - MLX5_CYCLES_SHIFT = 31 -}; - -enum { MLX5_PIN_MODE_IN = 0x0, MLX5_PIN_MODE_OUT = 0x1, }; @@ -93,6 +90,31 @@ static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev) return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify); } +static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz) +{ + /* Optimal shift constant leads to corrections above just 1 scaled ppm. + * + * Two sets of equations are needed to derive the optimal shift + * constant for the cyclecounter. + * + * dev_freq_khz * 1000 / 2^shift_constant = 1 scaled_ppm + * ppb = scaled_ppm * 1000 / 2^16 + * + * Using the two equations together + * + * dev_freq_khz * 1000 / 1 scaled_ppm = 2^shift_constant + * dev_freq_khz * 2^16 / 1 ppb = 2^shift_constant + * dev_freq_khz = 2^(shift_constant - 16) + * + * then yields + * + * shift_constant = ilog2(dev_freq_khz) + 16 + */ + + return min(ilog2(dev_freq_khz) + 16, + ilog2((U32_MAX / NSEC_PER_MSEC) * dev_freq_khz)); +} + static s32 mlx5_ptp_getmaxphase(struct ptp_clock_info *ptp) { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); @@ -227,10 +249,15 @@ static void mlx5_timestamp_overflow(struct work_struct *work) clock = container_of(timer, struct mlx5_clock, timer); mdev = container_of(clock, struct mlx5_core_dev, clock); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto out; + write_seqlock_irqsave(&clock->lock, flags); timecounter_read(&timer->tc); mlx5_update_clock_info_page(mdev); write_sequnlock_irqrestore(&clock->lock, flags); + +out: schedule_delayed_work(&timer->overflow_work, timer->overflow_period); } @@ -904,7 +931,7 @@ static void mlx5_timecounter_init(struct mlx5_core_dev *mdev) dev_freq = MLX5_CAP_GEN(mdev, device_frequency_khz); timer->cycles.read = read_internal_timer; - timer->cycles.shift = MLX5_CYCLES_SHIFT; + timer->cycles.shift = mlx5_ptp_shift_constant(dev_freq); timer->cycles.mult = clocksource_khz2mult(dev_freq, timer->cycles.shift); timer->nominal_c_mult = timer->cycles.mult; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c index 78c94b22bdc0..00e67910e3ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -2,214 +2,274 @@ /* Copyright (c) 2018 Mellanox Technologies */ #include <linux/mlx5/vport.h> +#include <linux/list.h> #include "lib/devcom.h" #include "mlx5_core.h" -static LIST_HEAD(devcom_list); +static LIST_HEAD(devcom_dev_list); +static LIST_HEAD(devcom_comp_list); +/* protect device list */ +static DEFINE_MUTEX(dev_list_lock); +/* protect component list */ +static DEFINE_MUTEX(comp_list_lock); -#define devcom_for_each_component(priv, comp, iter) \ - for (iter = 0; \ - comp = &(priv)->components[iter], iter < MLX5_DEVCOM_NUM_COMPONENTS; \ - iter++) +#define devcom_for_each_component(iter) \ + list_for_each_entry(iter, &devcom_comp_list, comp_list) -struct mlx5_devcom_component { - struct { - void __rcu *data; - } device[MLX5_DEVCOM_PORTS_SUPPORTED]; +struct mlx5_devcom_dev { + struct list_head list; + struct mlx5_core_dev *dev; + struct kref ref; +}; +struct mlx5_devcom_comp { + struct list_head comp_list; + enum mlx5_devcom_component id; + u64 key; + struct list_head comp_dev_list_head; mlx5_devcom_event_handler_t handler; - struct rw_semaphore sem; + struct kref ref; bool ready; + struct rw_semaphore sem; }; -struct mlx5_devcom_list { +struct mlx5_devcom_comp_dev { struct list_head list; - - struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS]; - struct mlx5_core_dev *devs[MLX5_DEVCOM_PORTS_SUPPORTED]; + struct mlx5_devcom_comp *comp; + struct mlx5_devcom_dev *devc; + void __rcu *data; }; -struct mlx5_devcom { - struct mlx5_devcom_list *priv; - int idx; -}; - -static struct mlx5_devcom_list *mlx5_devcom_list_alloc(void) +static bool devcom_dev_exists(struct mlx5_core_dev *dev) { - struct mlx5_devcom_component *comp; - struct mlx5_devcom_list *priv; - int i; - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return NULL; + struct mlx5_devcom_dev *iter; - devcom_for_each_component(priv, comp, i) - init_rwsem(&comp->sem); + list_for_each_entry(iter, &devcom_dev_list, list) + if (iter->dev == dev) + return true; - return priv; + return false; } -static struct mlx5_devcom *mlx5_devcom_alloc(struct mlx5_devcom_list *priv, - u8 idx) +static struct mlx5_devcom_dev * +mlx5_devcom_dev_alloc(struct mlx5_core_dev *dev) { - struct mlx5_devcom *devcom; + struct mlx5_devcom_dev *devc; - devcom = kzalloc(sizeof(*devcom), GFP_KERNEL); - if (!devcom) + devc = kzalloc(sizeof(*devc), GFP_KERNEL); + if (!devc) return NULL; - devcom->priv = priv; - devcom->idx = idx; - return devcom; + devc->dev = dev; + kref_init(&devc->ref); + return devc; } -/* Must be called with intf_mutex held */ -struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev) +struct mlx5_devcom_dev * +mlx5_devcom_register_device(struct mlx5_core_dev *dev) { - struct mlx5_devcom_list *priv = NULL, *iter; - struct mlx5_devcom *devcom = NULL; - bool new_priv = false; - u64 sguid0, sguid1; - int idx, i; - - if (!mlx5_core_is_pf(dev)) - return NULL; - if (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_DEVCOM_PORTS_SUPPORTED) - return NULL; - - mlx5_dev_list_lock(); - sguid0 = mlx5_query_nic_system_image_guid(dev); - list_for_each_entry(iter, &devcom_list, list) { - /* There is at least one device in iter */ - struct mlx5_core_dev *tmp_dev; - - idx = -1; - for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) { - if (iter->devs[i]) - tmp_dev = iter->devs[i]; - else - idx = i; - } - - if (idx == -1) - continue; - - sguid1 = mlx5_query_nic_system_image_guid(tmp_dev); - if (sguid0 != sguid1) - continue; - - priv = iter; - break; - } + struct mlx5_devcom_dev *devc; - if (!priv) { - priv = mlx5_devcom_list_alloc(); - if (!priv) { - devcom = ERR_PTR(-ENOMEM); - goto out; - } + mutex_lock(&dev_list_lock); - idx = 0; - new_priv = true; + if (devcom_dev_exists(dev)) { + devc = ERR_PTR(-EEXIST); + goto out; } - priv->devs[idx] = dev; - devcom = mlx5_devcom_alloc(priv, idx); - if (!devcom) { - if (new_priv) - kfree(priv); - devcom = ERR_PTR(-ENOMEM); + devc = mlx5_devcom_dev_alloc(dev); + if (!devc) { + devc = ERR_PTR(-ENOMEM); goto out; } - if (new_priv) - list_add(&priv->list, &devcom_list); + list_add_tail(&devc->list, &devcom_dev_list); out: - mlx5_dev_list_unlock(); - return devcom; + mutex_unlock(&dev_list_lock); + return devc; } -/* Must be called with intf_mutex held */ -void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom) +static void +mlx5_devcom_dev_release(struct kref *ref) { - struct mlx5_devcom_list *priv; - int i; + struct mlx5_devcom_dev *devc = container_of(ref, struct mlx5_devcom_dev, ref); - if (IS_ERR_OR_NULL(devcom)) - return; + mutex_lock(&dev_list_lock); + list_del(&devc->list); + mutex_unlock(&dev_list_lock); + kfree(devc); +} - mlx5_dev_list_lock(); - priv = devcom->priv; - priv->devs[devcom->idx] = NULL; +void mlx5_devcom_unregister_device(struct mlx5_devcom_dev *devc) +{ + if (!IS_ERR_OR_NULL(devc)) + kref_put(&devc->ref, mlx5_devcom_dev_release); +} - kfree(devcom); +static struct mlx5_devcom_comp * +mlx5_devcom_comp_alloc(u64 id, u64 key, mlx5_devcom_event_handler_t handler) +{ + struct mlx5_devcom_comp *comp; - for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) - if (priv->devs[i]) - break; + comp = kzalloc(sizeof(*comp), GFP_KERNEL); + if (!comp) + return ERR_PTR(-ENOMEM); - if (i != MLX5_DEVCOM_PORTS_SUPPORTED) - goto out; + comp->id = id; + comp->key = key; + comp->handler = handler; + init_rwsem(&comp->sem); + kref_init(&comp->ref); + INIT_LIST_HEAD(&comp->comp_dev_list_head); - list_del(&priv->list); - kfree(priv); -out: - mlx5_dev_list_unlock(); + return comp; } -void mlx5_devcom_register_component(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id, - mlx5_devcom_event_handler_t handler, - void *data) +static void +mlx5_devcom_comp_release(struct kref *ref) { - struct mlx5_devcom_component *comp; + struct mlx5_devcom_comp *comp = container_of(ref, struct mlx5_devcom_comp, ref); - if (IS_ERR_OR_NULL(devcom)) - return; + mutex_lock(&comp_list_lock); + list_del(&comp->comp_list); + mutex_unlock(&comp_list_lock); + kfree(comp); +} + +static struct mlx5_devcom_comp_dev * +devcom_alloc_comp_dev(struct mlx5_devcom_dev *devc, + struct mlx5_devcom_comp *comp, + void *data) +{ + struct mlx5_devcom_comp_dev *devcom; - WARN_ON(!data); + devcom = kzalloc(sizeof(*devcom), GFP_KERNEL); + if (!devcom) + return ERR_PTR(-ENOMEM); + + kref_get(&devc->ref); + devcom->devc = devc; + devcom->comp = comp; + rcu_assign_pointer(devcom->data, data); - comp = &devcom->priv->components[id]; down_write(&comp->sem); - comp->handler = handler; - rcu_assign_pointer(comp->device[devcom->idx].data, data); + list_add_tail(&devcom->list, &comp->comp_dev_list_head); up_write(&comp->sem); + + return devcom; } -void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id) +static void +devcom_free_comp_dev(struct mlx5_devcom_comp_dev *devcom) { - struct mlx5_devcom_component *comp; - - if (IS_ERR_OR_NULL(devcom)) - return; + struct mlx5_devcom_comp *comp = devcom->comp; - comp = &devcom->priv->components[id]; down_write(&comp->sem); - RCU_INIT_POINTER(comp->device[devcom->idx].data, NULL); + list_del(&devcom->list); up_write(&comp->sem); - synchronize_rcu(); + + kref_put(&devcom->devc->ref, mlx5_devcom_dev_release); + kfree(devcom); + kref_put(&comp->ref, mlx5_devcom_comp_release); } -int mlx5_devcom_send_event(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id, +static bool +devcom_component_equal(struct mlx5_devcom_comp *devcom, + enum mlx5_devcom_component id, + u64 key) +{ + return devcom->id == id && devcom->key == key; +} + +static struct mlx5_devcom_comp * +devcom_component_get(struct mlx5_devcom_dev *devc, + enum mlx5_devcom_component id, + u64 key, + mlx5_devcom_event_handler_t handler) +{ + struct mlx5_devcom_comp *comp; + + devcom_for_each_component(comp) { + if (devcom_component_equal(comp, id, key)) { + if (handler == comp->handler) { + kref_get(&comp->ref); + return comp; + } + + mlx5_core_err(devc->dev, + "Cannot register existing devcom component with different handler\n"); + return ERR_PTR(-EINVAL); + } + } + + return NULL; +} + +struct mlx5_devcom_comp_dev * +mlx5_devcom_register_component(struct mlx5_devcom_dev *devc, + enum mlx5_devcom_component id, + u64 key, + mlx5_devcom_event_handler_t handler, + void *data) +{ + struct mlx5_devcom_comp_dev *devcom; + struct mlx5_devcom_comp *comp; + + if (IS_ERR_OR_NULL(devc)) + return NULL; + + mutex_lock(&comp_list_lock); + comp = devcom_component_get(devc, id, key, handler); + if (IS_ERR(comp)) { + devcom = ERR_PTR(-EINVAL); + goto out_unlock; + } + + if (!comp) { + comp = mlx5_devcom_comp_alloc(id, key, handler); + if (IS_ERR(comp)) { + devcom = ERR_CAST(comp); + goto out_unlock; + } + list_add_tail(&comp->comp_list, &devcom_comp_list); + } + mutex_unlock(&comp_list_lock); + + devcom = devcom_alloc_comp_dev(devc, comp, data); + if (IS_ERR(devcom)) + kref_put(&comp->ref, mlx5_devcom_comp_release); + + return devcom; + +out_unlock: + mutex_unlock(&comp_list_lock); + return devcom; +} + +void mlx5_devcom_unregister_component(struct mlx5_devcom_comp_dev *devcom) +{ + if (!IS_ERR_OR_NULL(devcom)) + devcom_free_comp_dev(devcom); +} + +int mlx5_devcom_send_event(struct mlx5_devcom_comp_dev *devcom, int event, int rollback_event, void *event_data) { - struct mlx5_devcom_component *comp; - int err = -ENODEV, i; + struct mlx5_devcom_comp_dev *pos; + struct mlx5_devcom_comp *comp; + int err = 0; + void *data; if (IS_ERR_OR_NULL(devcom)) - return err; + return -ENODEV; - comp = &devcom->priv->components[id]; + comp = devcom->comp; down_write(&comp->sem); - for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) { - void *data = rcu_dereference_protected(comp->device[i].data, - lockdep_is_held(&comp->sem)); + list_for_each_entry(pos, &comp->comp_dev_list_head, list) { + data = rcu_dereference_protected(pos->data, lockdep_is_held(&comp->sem)); - if (i != devcom->idx && data) { + if (pos != devcom && data) { err = comp->handler(event, data, event_data); if (err) goto rollback; @@ -220,48 +280,43 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom, return 0; rollback: - while (i--) { - void *data = rcu_dereference_protected(comp->device[i].data, - lockdep_is_held(&comp->sem)); + if (list_entry_is_head(pos, &comp->comp_dev_list_head, list)) + goto out; + pos = list_prev_entry(pos, list); + list_for_each_entry_from_reverse(pos, &comp->comp_dev_list_head, list) { + data = rcu_dereference_protected(pos->data, lockdep_is_held(&comp->sem)); - if (i != devcom->idx && data) + if (pos != devcom && data) comp->handler(rollback_event, data, event_data); } - +out: up_write(&comp->sem); return err; } -void mlx5_devcom_comp_set_ready(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id, - bool ready) +void mlx5_devcom_comp_set_ready(struct mlx5_devcom_comp_dev *devcom, bool ready) { - struct mlx5_devcom_component *comp; - - comp = &devcom->priv->components[id]; - WARN_ON(!rwsem_is_locked(&comp->sem)); + WARN_ON(!rwsem_is_locked(&devcom->comp->sem)); - WRITE_ONCE(comp->ready, ready); + WRITE_ONCE(devcom->comp->ready, ready); } -bool mlx5_devcom_comp_is_ready(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id) +bool mlx5_devcom_comp_is_ready(struct mlx5_devcom_comp_dev *devcom) { if (IS_ERR_OR_NULL(devcom)) return false; - return READ_ONCE(devcom->priv->components[id].ready); + return READ_ONCE(devcom->comp->ready); } -bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id) +bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom_comp_dev *devcom) { - struct mlx5_devcom_component *comp; + struct mlx5_devcom_comp *comp; if (IS_ERR_OR_NULL(devcom)) return false; - comp = &devcom->priv->components[id]; + comp = devcom->comp; down_read(&comp->sem); if (!READ_ONCE(comp->ready)) { up_read(&comp->sem); @@ -271,74 +326,60 @@ bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom *devcom, return true; } -void mlx5_devcom_for_each_peer_end(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id) +void mlx5_devcom_for_each_peer_end(struct mlx5_devcom_comp_dev *devcom) { - struct mlx5_devcom_component *comp = &devcom->priv->components[id]; - - up_read(&comp->sem); + up_read(&devcom->comp->sem); } -void *mlx5_devcom_get_next_peer_data(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id, - int *i) +void *mlx5_devcom_get_next_peer_data(struct mlx5_devcom_comp_dev *devcom, + struct mlx5_devcom_comp_dev **pos) { - struct mlx5_devcom_component *comp; - void *ret; - int idx; + struct mlx5_devcom_comp *comp = devcom->comp; + struct mlx5_devcom_comp_dev *tmp; + void *data; - comp = &devcom->priv->components[id]; + tmp = list_prepare_entry(*pos, &comp->comp_dev_list_head, list); - if (*i == MLX5_DEVCOM_PORTS_SUPPORTED) - return NULL; - for (idx = *i; idx < MLX5_DEVCOM_PORTS_SUPPORTED; idx++) { - if (idx != devcom->idx) { - ret = rcu_dereference_protected(comp->device[idx].data, - lockdep_is_held(&comp->sem)); - if (ret) + list_for_each_entry_continue(tmp, &comp->comp_dev_list_head, list) { + if (tmp != devcom) { + data = rcu_dereference_protected(tmp->data, lockdep_is_held(&comp->sem)); + if (data) break; } } - if (idx == MLX5_DEVCOM_PORTS_SUPPORTED) { - *i = idx; + if (list_entry_is_head(tmp, &comp->comp_dev_list_head, list)) return NULL; - } - *i = idx + 1; - return ret; + *pos = tmp; + return data; } -void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id, - int *i) +void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom_comp_dev *devcom, + struct mlx5_devcom_comp_dev **pos) { - struct mlx5_devcom_component *comp; - void *ret; - int idx; + struct mlx5_devcom_comp *comp = devcom->comp; + struct mlx5_devcom_comp_dev *tmp; + void *data; - comp = &devcom->priv->components[id]; + tmp = list_prepare_entry(*pos, &comp->comp_dev_list_head, list); - if (*i == MLX5_DEVCOM_PORTS_SUPPORTED) - return NULL; - for (idx = *i; idx < MLX5_DEVCOM_PORTS_SUPPORTED; idx++) { - if (idx != devcom->idx) { + list_for_each_entry_continue(tmp, &comp->comp_dev_list_head, list) { + if (tmp != devcom) { /* This can change concurrently, however 'data' pointer will remain * valid for the duration of RCU read section. */ if (!READ_ONCE(comp->ready)) return NULL; - ret = rcu_dereference(comp->device[idx].data); - if (ret) + data = rcu_dereference(tmp->data); + if (data) break; } } - if (idx == MLX5_DEVCOM_PORTS_SUPPORTED) { - *i = idx; + if (list_entry_is_head(tmp, &comp->comp_dev_list_head, list)) return NULL; - } - *i = idx + 1; - return ret; + *pos = tmp; + return data; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index d953a01b8eaa..8389ac0af708 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -6,11 +6,8 @@ #include <linux/mlx5/driver.h> -#define MLX5_DEVCOM_PORTS_SUPPORTED 4 - -enum mlx5_devcom_components { +enum mlx5_devcom_component { MLX5_DEVCOM_ESW_OFFLOADS, - MLX5_DEVCOM_NUM_COMPONENTS, }; @@ -18,45 +15,40 @@ typedef int (*mlx5_devcom_event_handler_t)(int event, void *my_data, void *event_data); -struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev); -void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom); +struct mlx5_devcom_dev *mlx5_devcom_register_device(struct mlx5_core_dev *dev); +void mlx5_devcom_unregister_device(struct mlx5_devcom_dev *devc); -void mlx5_devcom_register_component(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id, - mlx5_devcom_event_handler_t handler, - void *data); -void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id); +struct mlx5_devcom_comp_dev * +mlx5_devcom_register_component(struct mlx5_devcom_dev *devc, + enum mlx5_devcom_component id, + u64 key, + mlx5_devcom_event_handler_t handler, + void *data); +void mlx5_devcom_unregister_component(struct mlx5_devcom_comp_dev *devcom); -int mlx5_devcom_send_event(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id, +int mlx5_devcom_send_event(struct mlx5_devcom_comp_dev *devcom, int event, int rollback_event, void *event_data); -void mlx5_devcom_comp_set_ready(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id, - bool ready); -bool mlx5_devcom_comp_is_ready(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id); - -bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id); -void mlx5_devcom_for_each_peer_end(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id); -void *mlx5_devcom_get_next_peer_data(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id, int *i); - -#define mlx5_devcom_for_each_peer_entry(devcom, id, data, i) \ - for (i = 0, data = mlx5_devcom_get_next_peer_data(devcom, id, &i); \ - data; \ - data = mlx5_devcom_get_next_peer_data(devcom, id, &i)) - -void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id, int *i); - -#define mlx5_devcom_for_each_peer_entry_rcu(devcom, id, data, i) \ - for (i = 0, data = mlx5_devcom_get_next_peer_data_rcu(devcom, id, &i); \ - data; \ - data = mlx5_devcom_get_next_peer_data_rcu(devcom, id, &i)) - -#endif +void mlx5_devcom_comp_set_ready(struct mlx5_devcom_comp_dev *devcom, bool ready); +bool mlx5_devcom_comp_is_ready(struct mlx5_devcom_comp_dev *devcom); + +bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom_comp_dev *devcom); +void mlx5_devcom_for_each_peer_end(struct mlx5_devcom_comp_dev *devcom); +void *mlx5_devcom_get_next_peer_data(struct mlx5_devcom_comp_dev *devcom, + struct mlx5_devcom_comp_dev **pos); + +#define mlx5_devcom_for_each_peer_entry(devcom, data, pos) \ + for (pos = NULL, data = mlx5_devcom_get_next_peer_data(devcom, &pos); \ + data; \ + data = mlx5_devcom_get_next_peer_data(devcom, &pos)) + +void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom_comp_dev *devcom, + struct mlx5_devcom_comp_dev **pos); + +#define mlx5_devcom_for_each_peer_entry_rcu(devcom, data, pos) \ + for (pos = NULL, data = mlx5_devcom_get_next_peer_data_rcu(devcom, &pos); \ + data; \ + data = mlx5_devcom_get_next_peer_data_rcu(devcom, &pos)) + +#endif /* __LIB_MLX5_DEVCOM_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index d3d628b862f3..69a75459775d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -104,6 +104,6 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); #endif -int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn); +int mlx5_comp_irqn_get(struct mlx5_core_dev *dev, int vector, unsigned int *irqn); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index db9df9798ffa..a80ecb672f33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -178,7 +178,7 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains, if (!mlx5_chains_ignore_flow_level_supported(chains) || (chain == 0 && prio == 1 && level == 0)) { ft_attr.level = chains->fs_base_level; - ft_attr.prio = chains->fs_base_prio; + ft_attr.prio = chains->fs_base_prio + prio - 1; ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ? mlx5_get_fdb_sub_ns(chains->dev, chain) : mlx5_get_flow_namespace(chains->dev, chains->ns); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c index 4047629a876b..30564d9b00e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c @@ -40,7 +40,7 @@ struct mlx5_hv_vhca_agent { struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev) { - struct mlx5_hv_vhca *hv_vhca = NULL; + struct mlx5_hv_vhca *hv_vhca; hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL); if (!hv_vhca) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c new file mode 100644 index 000000000000..4a078113e292 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c @@ -0,0 +1,2411 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <net/macsec.h> +#include <linux/mlx5/qp.h> +#include <linux/if_vlan.h> +#include <linux/mlx5/fs_helpers.h> +#include <linux/mlx5/macsec.h> +#include "fs_core.h" +#include "lib/macsec_fs.h" +#include "mlx5_core.h" + +/* MACsec TX flow steering */ +#define CRYPTO_NUM_MAXSEC_FTE BIT(15) +#define CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE 1 + +#define TX_CRYPTO_TABLE_LEVEL 0 +#define TX_CRYPTO_TABLE_NUM_GROUPS 3 +#define TX_CRYPTO_TABLE_MKE_GROUP_SIZE 1 +#define TX_CRYPTO_TABLE_SA_GROUP_SIZE \ + (CRYPTO_NUM_MAXSEC_FTE - (TX_CRYPTO_TABLE_MKE_GROUP_SIZE + \ + CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE)) +#define TX_CHECK_TABLE_LEVEL 1 +#define TX_CHECK_TABLE_NUM_FTE 2 +#define RX_CRYPTO_TABLE_LEVEL 0 +#define RX_CHECK_TABLE_LEVEL 1 +#define RX_ROCE_TABLE_LEVEL 2 +#define RX_CHECK_TABLE_NUM_FTE 3 +#define RX_ROCE_TABLE_NUM_FTE 2 +#define RX_CRYPTO_TABLE_NUM_GROUPS 3 +#define RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE \ + ((CRYPTO_NUM_MAXSEC_FTE - CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE) / 2) +#define RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE \ + (CRYPTO_NUM_MAXSEC_FTE - RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE) +#define RX_NUM_OF_RULES_PER_SA 2 + +#define RDMA_RX_ROCE_IP_TABLE_LEVEL 0 +#define RDMA_RX_ROCE_MACSEC_OP_TABLE_LEVEL 1 + +#define MLX5_MACSEC_TAG_LEN 8 /* SecTAG length with ethertype and without the optional SCI */ +#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK 0x23 +#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET 0x8 +#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET 0x5 +#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT (0x1 << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET) +#define MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI 0x8 +#define MLX5_SECTAG_HEADER_SIZE_WITH_SCI (MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI + MACSEC_SCI_LEN) + +/* MACsec RX flow steering */ +#define MLX5_ETH_WQE_FT_META_MACSEC_MASK 0x3E + +/* MACsec fs_id handling for steering */ +#define macsec_fs_set_tx_fs_id(fs_id) (MLX5_ETH_WQE_FT_META_MACSEC | (fs_id) << 2) +#define macsec_fs_set_rx_fs_id(fs_id) ((fs_id) | BIT(30)) + +struct mlx5_sectag_header { + __be16 ethertype; + u8 tci_an; + u8 sl; + u32 pn; + u8 sci[MACSEC_SCI_LEN]; /* optional */ +} __packed; + +struct mlx5_roce_macsec_tx_rule { + u32 fs_id; + u16 gid_idx; + struct list_head entry; + struct mlx5_flow_handle *rule; + struct mlx5_modify_hdr *meta_modhdr; +}; + +struct mlx5_macsec_tx_rule { + struct mlx5_flow_handle *rule; + struct mlx5_pkt_reformat *pkt_reformat; + u32 fs_id; +}; + +struct mlx5_macsec_flow_table { + int num_groups; + struct mlx5_flow_table *t; + struct mlx5_flow_group **g; +}; + +struct mlx5_macsec_tables { + struct mlx5_macsec_flow_table ft_crypto; + struct mlx5_flow_handle *crypto_miss_rule; + + struct mlx5_flow_table *ft_check; + struct mlx5_flow_group *ft_check_group; + struct mlx5_fc *check_miss_rule_counter; + struct mlx5_flow_handle *check_miss_rule; + struct mlx5_fc *check_rule_counter; + + u32 refcnt; +}; + +struct mlx5_fs_id { + u32 id; + refcount_t refcnt; + sci_t sci; + struct rhash_head hash; +}; + +struct mlx5_macsec_device { + struct list_head macsec_devices_list_entry; + void *macdev; + struct xarray tx_id_xa; + struct xarray rx_id_xa; +}; + +struct mlx5_macsec_tx { + struct mlx5_flow_handle *crypto_mke_rule; + struct mlx5_flow_handle *check_rule; + + struct ida tx_halloc; + + struct mlx5_macsec_tables tables; + + struct mlx5_flow_table *ft_rdma_tx; +}; + +struct mlx5_roce_macsec_rx_rule { + u32 fs_id; + u16 gid_idx; + struct mlx5_flow_handle *op; + struct mlx5_flow_handle *ip; + struct list_head entry; +}; + +struct mlx5_macsec_rx_rule { + struct mlx5_flow_handle *rule[RX_NUM_OF_RULES_PER_SA]; + struct mlx5_modify_hdr *meta_modhdr; +}; + +struct mlx5_macsec_miss { + struct mlx5_flow_group *g; + struct mlx5_flow_handle *rule; +}; + +struct mlx5_macsec_rx_roce { + /* Flow table/rules in NIC domain, to check if it's a RoCE packet */ + struct mlx5_flow_group *g; + struct mlx5_flow_table *ft; + struct mlx5_flow_handle *rule; + struct mlx5_modify_hdr *copy_modify_hdr; + struct mlx5_macsec_miss nic_miss; + + /* Flow table/rule in RDMA domain, to check dgid */ + struct mlx5_flow_table *ft_ip_check; + struct mlx5_flow_table *ft_macsec_op_check; + struct mlx5_macsec_miss miss; +}; + +struct mlx5_macsec_rx { + struct mlx5_flow_handle *check_rule[2]; + struct mlx5_pkt_reformat *check_rule_pkt_reformat[2]; + + struct mlx5_macsec_tables tables; + struct mlx5_macsec_rx_roce roce; +}; + +union mlx5_macsec_rule { + struct mlx5_macsec_tx_rule tx_rule; + struct mlx5_macsec_rx_rule rx_rule; +}; + +static const struct rhashtable_params rhash_sci = { + .key_len = sizeof_field(struct mlx5_fs_id, sci), + .key_offset = offsetof(struct mlx5_fs_id, sci), + .head_offset = offsetof(struct mlx5_fs_id, hash), + .automatic_shrinking = true, + .min_size = 1, +}; + +static const struct rhashtable_params rhash_fs_id = { + .key_len = sizeof_field(struct mlx5_fs_id, id), + .key_offset = offsetof(struct mlx5_fs_id, id), + .head_offset = offsetof(struct mlx5_fs_id, hash), + .automatic_shrinking = true, + .min_size = 1, +}; + +struct mlx5_macsec_fs { + struct mlx5_core_dev *mdev; + struct mlx5_macsec_tx *tx_fs; + struct mlx5_macsec_rx *rx_fs; + + /* Stats manage */ + struct mlx5_macsec_stats stats; + + /* Tx sci -> fs id mapping handling */ + struct rhashtable sci_hash; /* sci -> mlx5_fs_id */ + + /* RX fs_id -> mlx5_fs_id mapping handling */ + struct rhashtable fs_id_hash; /* fs_id -> mlx5_fs_id */ + + /* TX & RX fs_id lists per macsec device */ + struct list_head macsec_devices_list; +}; + +static void macsec_fs_destroy_groups(struct mlx5_macsec_flow_table *ft) +{ + int i; + + for (i = ft->num_groups - 1; i >= 0; i--) { + if (!IS_ERR_OR_NULL(ft->g[i])) + mlx5_destroy_flow_group(ft->g[i]); + ft->g[i] = NULL; + } + ft->num_groups = 0; +} + +static void macsec_fs_destroy_flow_table(struct mlx5_macsec_flow_table *ft) +{ + macsec_fs_destroy_groups(ft); + kfree(ft->g); + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; +} + +static void macsec_fs_tx_destroy(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5_macsec_tables *tx_tables; + + if (mlx5_is_macsec_roce_supported(macsec_fs->mdev)) + mlx5_destroy_flow_table(tx_fs->ft_rdma_tx); + + tx_tables = &tx_fs->tables; + + /* Tx check table */ + if (tx_fs->check_rule) { + mlx5_del_flow_rules(tx_fs->check_rule); + tx_fs->check_rule = NULL; + } + + if (tx_tables->check_miss_rule) { + mlx5_del_flow_rules(tx_tables->check_miss_rule); + tx_tables->check_miss_rule = NULL; + } + + if (tx_tables->ft_check_group) { + mlx5_destroy_flow_group(tx_tables->ft_check_group); + tx_tables->ft_check_group = NULL; + } + + if (tx_tables->ft_check) { + mlx5_destroy_flow_table(tx_tables->ft_check); + tx_tables->ft_check = NULL; + } + + /* Tx crypto table */ + if (tx_fs->crypto_mke_rule) { + mlx5_del_flow_rules(tx_fs->crypto_mke_rule); + tx_fs->crypto_mke_rule = NULL; + } + + if (tx_tables->crypto_miss_rule) { + mlx5_del_flow_rules(tx_tables->crypto_miss_rule); + tx_tables->crypto_miss_rule = NULL; + } + + macsec_fs_destroy_flow_table(&tx_tables->ft_crypto); +} + +static int macsec_fs_tx_create_crypto_table_groups(struct mlx5_macsec_flow_table *ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int mclen = MLX5_ST_SZ_BYTES(fte_match_param); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(TX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + + if (!in) { + kfree(ft->g); + ft->g = NULL; + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + /* Flow Group for MKE match */ + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += TX_CRYPTO_TABLE_MKE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Flow Group for SA rules */ + memset(in, 0, inlen); + memset(mc, 0, mclen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2); + MLX5_SET(fte_match_param, mc, misc_parameters_2.metadata_reg_a, + MLX5_ETH_WQE_FT_META_MACSEC_MASK); + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += TX_CRYPTO_TABLE_SA_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Flow Group for l2 traps */ + memset(in, 0, inlen); + memset(mc, 0, mclen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static struct mlx5_flow_table + *macsec_fs_auto_group_table_create(struct mlx5_flow_namespace *ns, int flags, + int level, int max_fte) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *fdb = NULL; + + /* reserve entry for the match all miss group and rule */ + ft_attr.autogroup.num_reserved_entries = 1; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.prio = 0; + ft_attr.flags = flags; + ft_attr.level = level; + ft_attr.max_fte = max_fte; + + fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + + return fdb; +} + +enum { + RDMA_TX_MACSEC_LEVEL = 0, +}; + +static int macsec_fs_tx_roce_create(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + int err; + + if (!mlx5_is_macsec_roce_supported(mdev)) { + mlx5_core_dbg(mdev, "Failed to init RoCE MACsec, capabilities not supported\n"); + return 0; + } + + ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC); + if (!ns) + return -ENOMEM; + + /* Tx RoCE crypto table */ + ft = macsec_fs_auto_group_table_create(ns, 0, RDMA_TX_MACSEC_LEVEL, CRYPTO_NUM_MAXSEC_FTE); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Failed to create MACsec RoCE Tx crypto table err(%d)\n", err); + return err; + } + tx_fs->ft_rdma_tx = ft; + + return 0; +} + +static int macsec_fs_tx_create(struct mlx5_macsec_fs *macsec_fs) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5_macsec_tables *tx_tables; + struct mlx5_flow_act flow_act = {}; + struct mlx5_macsec_flow_table *ft_crypto; + struct mlx5_flow_table *flow_table; + struct mlx5_flow_group *flow_group; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + u32 *flow_group_in; + int err; + + ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_EGRESS_MACSEC); + if (!ns) + return -ENOMEM; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + err = -ENOMEM; + goto out_spec; + } + + tx_tables = &tx_fs->tables; + ft_crypto = &tx_tables->ft_crypto; + + /* Tx crypto table */ + ft_attr.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + ft_attr.level = TX_CRYPTO_TABLE_LEVEL; + ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE; + + flow_table = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(flow_table)) { + err = PTR_ERR(flow_table); + mlx5_core_err(mdev, "Failed to create MACsec Tx crypto table err(%d)\n", err); + goto out_flow_group; + } + ft_crypto->t = flow_table; + + /* Tx crypto table groups */ + err = macsec_fs_tx_create_crypto_table_groups(ft_crypto); + if (err) { + mlx5_core_err(mdev, + "Failed to create default flow group for MACsec Tx crypto table err(%d)\n", + err); + goto err; + } + + /* Tx crypto table MKE rule - MKE packets shouldn't be offloaded */ + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_PAE); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + + rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to add MACsec TX MKE rule, err=%d\n", err); + goto err; + } + tx_fs->crypto_mke_rule = rule; + + /* Tx crypto table Default miss rule */ + memset(&flow_act, 0, sizeof(flow_act)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to add MACsec Tx table default miss rule %d\n", err); + goto err; + } + tx_tables->crypto_miss_rule = rule; + + /* Tx check table */ + flow_table = macsec_fs_auto_group_table_create(ns, 0, TX_CHECK_TABLE_LEVEL, + TX_CHECK_TABLE_NUM_FTE); + if (IS_ERR(flow_table)) { + err = PTR_ERR(flow_table); + mlx5_core_err(mdev, "Fail to create MACsec TX check table, err(%d)\n", err); + goto err; + } + tx_tables->ft_check = flow_table; + + /* Tx check table Default miss group/rule */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1); + flow_group = mlx5_create_flow_group(tx_tables->ft_check, flow_group_in); + if (IS_ERR(flow_group)) { + err = PTR_ERR(flow_group); + mlx5_core_err(mdev, + "Failed to create default flow group for MACsec Tx crypto table err(%d)\n", + err); + goto err; + } + tx_tables->ft_check_group = flow_group; + + /* Tx check table default drop rule */ + memset(&dest, 0, sizeof(struct mlx5_flow_destination)); + memset(&flow_act, 0, sizeof(flow_act)); + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(tx_tables->check_miss_rule_counter); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + rule = mlx5_add_flow_rules(tx_tables->ft_check, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to added MACsec tx check drop rule, err(%d)\n", err); + goto err; + } + tx_tables->check_miss_rule = rule; + + /* Tx check table rule */ + memset(spec, 0, sizeof(struct mlx5_flow_spec)); + memset(&dest, 0, sizeof(struct mlx5_flow_destination)); + memset(&flow_act, 0, sizeof(flow_act)); + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + + flow_act.flags = FLOW_ACT_NO_APPEND; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(tx_tables->check_rule_counter); + rule = mlx5_add_flow_rules(tx_tables->ft_check, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to add MACsec check rule, err=%d\n", err); + goto err; + } + tx_fs->check_rule = rule; + + err = macsec_fs_tx_roce_create(macsec_fs); + if (err) + goto err; + + kvfree(flow_group_in); + kvfree(spec); + return 0; + +err: + macsec_fs_tx_destroy(macsec_fs); +out_flow_group: + kvfree(flow_group_in); +out_spec: + kvfree(spec); + return err; +} + +static int macsec_fs_tx_ft_get(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5_macsec_tables *tx_tables; + int err = 0; + + tx_tables = &tx_fs->tables; + if (tx_tables->refcnt) + goto out; + + err = macsec_fs_tx_create(macsec_fs); + if (err) + return err; + +out: + tx_tables->refcnt++; + return err; +} + +static void macsec_fs_tx_ft_put(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables; + + if (--tx_tables->refcnt) + return; + + macsec_fs_tx_destroy(macsec_fs); +} + +static int macsec_fs_tx_setup_fte(struct mlx5_macsec_fs *macsec_fs, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + u32 macsec_obj_id, + u32 *fs_id) +{ + struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs; + int err = 0; + u32 id; + + err = ida_alloc_range(&tx_fs->tx_halloc, 1, + MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES, + GFP_KERNEL); + if (err < 0) + return err; + + id = err; + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + + /* Metadata match */ + MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a, + MLX5_ETH_WQE_FT_META_MACSEC_MASK); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a, + macsec_fs_set_tx_fs_id(id)); + + *fs_id = id; + flow_act->crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC; + flow_act->crypto.obj_id = macsec_obj_id; + + mlx5_core_dbg(macsec_fs->mdev, "Tx fte: macsec obj_id %u, fs_id %u\n", macsec_obj_id, id); + return 0; +} + +static void macsec_fs_tx_create_sectag_header(const struct macsec_context *ctx, + char *reformatbf, + size_t *reformat_size) +{ + const struct macsec_secy *secy = ctx->secy; + bool sci_present = macsec_send_sci(secy); + struct mlx5_sectag_header sectag = {}; + const struct macsec_tx_sc *tx_sc; + + tx_sc = &secy->tx_sc; + sectag.ethertype = htons(ETH_P_MACSEC); + + if (sci_present) { + sectag.tci_an |= MACSEC_TCI_SC; + memcpy(§ag.sci, &secy->sci, + sizeof(sectag.sci)); + } else { + if (tx_sc->end_station) + sectag.tci_an |= MACSEC_TCI_ES; + if (tx_sc->scb) + sectag.tci_an |= MACSEC_TCI_SCB; + } + + /* With GCM, C/E clear for !encrypt, both set for encrypt */ + if (tx_sc->encrypt) + sectag.tci_an |= MACSEC_TCI_CONFID; + else if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN) + sectag.tci_an |= MACSEC_TCI_C; + + sectag.tci_an |= tx_sc->encoding_sa; + + *reformat_size = MLX5_MACSEC_TAG_LEN + (sci_present ? MACSEC_SCI_LEN : 0); + + memcpy(reformatbf, §ag, *reformat_size); +} + +static bool macsec_fs_is_macsec_device_empty(struct mlx5_macsec_device *macsec_device) +{ + if (xa_empty(&macsec_device->tx_id_xa) && + xa_empty(&macsec_device->rx_id_xa)) + return true; + + return false; +} + +static void macsec_fs_id_del(struct list_head *macsec_devices_list, u32 fs_id, + void *macdev, struct rhashtable *hash_table, bool is_tx) +{ + const struct rhashtable_params *rhash = (is_tx) ? &rhash_sci : &rhash_fs_id; + struct mlx5_macsec_device *iter, *macsec_device = NULL; + struct mlx5_fs_id *fs_id_found; + struct xarray *fs_id_xa; + + list_for_each_entry(iter, macsec_devices_list, macsec_devices_list_entry) { + if (iter->macdev == macdev) { + macsec_device = iter; + break; + } + } + WARN_ON(!macsec_device); + + fs_id_xa = (is_tx) ? &macsec_device->tx_id_xa : + &macsec_device->rx_id_xa; + xa_lock(fs_id_xa); + fs_id_found = xa_load(fs_id_xa, fs_id); + WARN_ON(!fs_id_found); + + if (!refcount_dec_and_test(&fs_id_found->refcnt)) { + xa_unlock(fs_id_xa); + return; + } + + if (fs_id_found->id) { + /* Make sure ongoing datapath readers sees a valid SA */ + rhashtable_remove_fast(hash_table, &fs_id_found->hash, *rhash); + fs_id_found->id = 0; + } + xa_unlock(fs_id_xa); + + xa_erase(fs_id_xa, fs_id); + + kfree(fs_id_found); + + if (macsec_fs_is_macsec_device_empty(macsec_device)) { + list_del(&macsec_device->macsec_devices_list_entry); + kfree(macsec_device); + } +} + +static int macsec_fs_id_add(struct list_head *macsec_devices_list, u32 fs_id, + void *macdev, struct rhashtable *hash_table, sci_t sci, + bool is_tx) +{ + const struct rhashtable_params *rhash = (is_tx) ? &rhash_sci : &rhash_fs_id; + struct mlx5_macsec_device *iter, *macsec_device = NULL; + struct mlx5_fs_id *fs_id_iter; + struct xarray *fs_id_xa; + int err; + + if (!is_tx) { + rcu_read_lock(); + fs_id_iter = rhashtable_lookup(hash_table, &fs_id, rhash_fs_id); + if (fs_id_iter) { + refcount_inc(&fs_id_iter->refcnt); + rcu_read_unlock(); + return 0; + } + rcu_read_unlock(); + } + + fs_id_iter = kzalloc(sizeof(*fs_id_iter), GFP_KERNEL); + if (!fs_id_iter) + return -ENOMEM; + + list_for_each_entry(iter, macsec_devices_list, macsec_devices_list_entry) { + if (iter->macdev == macdev) { + macsec_device = iter; + break; + } + } + + if (!macsec_device) { /* first time adding a SA to that device */ + macsec_device = kzalloc(sizeof(*macsec_device), GFP_KERNEL); + if (!macsec_device) { + err = -ENOMEM; + goto err_alloc_dev; + } + macsec_device->macdev = macdev; + xa_init(&macsec_device->tx_id_xa); + xa_init(&macsec_device->rx_id_xa); + list_add(&macsec_device->macsec_devices_list_entry, macsec_devices_list); + } + + fs_id_xa = (is_tx) ? &macsec_device->tx_id_xa : + &macsec_device->rx_id_xa; + fs_id_iter->id = fs_id; + refcount_set(&fs_id_iter->refcnt, 1); + fs_id_iter->sci = sci; + err = xa_err(xa_store(fs_id_xa, fs_id, fs_id_iter, GFP_KERNEL)); + if (err) + goto err_store_id; + + err = rhashtable_insert_fast(hash_table, &fs_id_iter->hash, *rhash); + if (err) + goto err_hash_insert; + + return 0; + +err_hash_insert: + xa_erase(fs_id_xa, fs_id); +err_store_id: + if (macsec_fs_is_macsec_device_empty(macsec_device)) { + list_del(&macsec_device->macsec_devices_list_entry); + kfree(macsec_device); + } +err_alloc_dev: + kfree(fs_id_iter); + return err; +} + +static void macsec_fs_tx_del_rule(struct mlx5_macsec_fs *macsec_fs, + struct mlx5_macsec_tx_rule *tx_rule, + void *macdev) +{ + macsec_fs_id_del(&macsec_fs->macsec_devices_list, tx_rule->fs_id, macdev, + &macsec_fs->sci_hash, true); + + if (tx_rule->rule) { + mlx5_del_flow_rules(tx_rule->rule); + tx_rule->rule = NULL; + } + + if (tx_rule->pkt_reformat) { + mlx5_packet_reformat_dealloc(macsec_fs->mdev, tx_rule->pkt_reformat); + tx_rule->pkt_reformat = NULL; + } + + if (tx_rule->fs_id) { + ida_free(&macsec_fs->tx_fs->tx_halloc, tx_rule->fs_id); + tx_rule->fs_id = 0; + } + + kfree(tx_rule); + + macsec_fs_tx_ft_put(macsec_fs); +} + +#define MLX5_REFORMAT_PARAM_ADD_MACSEC_OFFSET_4_BYTES 1 + +static union mlx5_macsec_rule * +macsec_fs_tx_add_rule(struct mlx5_macsec_fs *macsec_fs, + const struct macsec_context *macsec_ctx, + struct mlx5_macsec_rule_attrs *attrs, u32 *fs_id) +{ + char reformatbf[MLX5_MACSEC_TAG_LEN + MACSEC_SCI_LEN]; + struct mlx5_pkt_reformat_params reformat_params = {}; + struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + union mlx5_macsec_rule *macsec_rule = NULL; + struct mlx5_flow_destination dest = {}; + struct mlx5_macsec_tables *tx_tables; + struct mlx5_macsec_tx_rule *tx_rule; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + size_t reformat_size; + int err = 0; + + tx_tables = &tx_fs->tables; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return NULL; + + err = macsec_fs_tx_ft_get(macsec_fs); + if (err) + goto out_spec; + + macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL); + if (!macsec_rule) { + macsec_fs_tx_ft_put(macsec_fs); + goto out_spec; + } + + tx_rule = &macsec_rule->tx_rule; + + /* Tx crypto table crypto rule */ + macsec_fs_tx_create_sectag_header(macsec_ctx, reformatbf, &reformat_size); + + reformat_params.type = MLX5_REFORMAT_TYPE_ADD_MACSEC; + reformat_params.size = reformat_size; + reformat_params.data = reformatbf; + + if (is_vlan_dev(macsec_ctx->netdev)) + reformat_params.param_0 = MLX5_REFORMAT_PARAM_ADD_MACSEC_OFFSET_4_BYTES; + + flow_act.pkt_reformat = mlx5_packet_reformat_alloc(mdev, + &reformat_params, + MLX5_FLOW_NAMESPACE_EGRESS_MACSEC); + if (IS_ERR(flow_act.pkt_reformat)) { + err = PTR_ERR(flow_act.pkt_reformat); + mlx5_core_err(mdev, "Failed to allocate MACsec Tx reformat context err=%d\n", err); + goto err; + } + tx_rule->pkt_reformat = flow_act.pkt_reformat; + + err = macsec_fs_tx_setup_fte(macsec_fs, spec, &flow_act, attrs->macsec_obj_id, fs_id); + if (err) { + mlx5_core_err(mdev, + "Failed to add packet reformat for MACsec TX crypto rule, err=%d\n", + err); + goto err; + } + + tx_rule->fs_id = *fs_id; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = tx_tables->ft_check; + rule = mlx5_add_flow_rules(tx_tables->ft_crypto.t, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to add MACsec TX crypto rule, err=%d\n", err); + goto err; + } + tx_rule->rule = rule; + + err = macsec_fs_id_add(&macsec_fs->macsec_devices_list, *fs_id, macsec_ctx->secy->netdev, + &macsec_fs->sci_hash, attrs->sci, true); + if (err) { + mlx5_core_err(mdev, "Failed to save fs_id, err=%d\n", err); + goto err; + } + + goto out_spec; + +err: + macsec_fs_tx_del_rule(macsec_fs, tx_rule, macsec_ctx->secy->netdev); + macsec_rule = NULL; +out_spec: + kvfree(spec); + + return macsec_rule; +} + +static void macsec_fs_tx_cleanup(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_macsec_tables *tx_tables; + + if (!tx_fs) + return; + + tx_tables = &tx_fs->tables; + if (tx_tables->refcnt) { + mlx5_core_err(mdev, + "Can't destroy MACsec offload tx_fs, refcnt(%u) isn't 0\n", + tx_tables->refcnt); + return; + } + + ida_destroy(&tx_fs->tx_halloc); + + if (tx_tables->check_miss_rule_counter) { + mlx5_fc_destroy(mdev, tx_tables->check_miss_rule_counter); + tx_tables->check_miss_rule_counter = NULL; + } + + if (tx_tables->check_rule_counter) { + mlx5_fc_destroy(mdev, tx_tables->check_rule_counter); + tx_tables->check_rule_counter = NULL; + } + + kfree(tx_fs); + macsec_fs->tx_fs = NULL; +} + +static int macsec_fs_tx_init(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_macsec_tables *tx_tables; + struct mlx5_macsec_tx *tx_fs; + struct mlx5_fc *flow_counter; + int err; + + tx_fs = kzalloc(sizeof(*tx_fs), GFP_KERNEL); + if (!tx_fs) + return -ENOMEM; + + tx_tables = &tx_fs->tables; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to create MACsec Tx encrypt flow counter, err(%d)\n", + err); + goto err_encrypt_counter; + } + tx_tables->check_rule_counter = flow_counter; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to create MACsec Tx drop flow counter, err(%d)\n", + err); + goto err_drop_counter; + } + tx_tables->check_miss_rule_counter = flow_counter; + + ida_init(&tx_fs->tx_halloc); + INIT_LIST_HEAD(&macsec_fs->macsec_devices_list); + + macsec_fs->tx_fs = tx_fs; + + return 0; + +err_drop_counter: + mlx5_fc_destroy(mdev, tx_tables->check_rule_counter); + tx_tables->check_rule_counter = NULL; + +err_encrypt_counter: + kfree(tx_fs); + macsec_fs->tx_fs = NULL; + + return err; +} + +static void macsec_fs_rx_roce_miss_destroy(struct mlx5_macsec_miss *miss) +{ + mlx5_del_flow_rules(miss->rule); + mlx5_destroy_flow_group(miss->g); +} + +static void macsec_fs_rdma_rx_destroy(struct mlx5_macsec_rx_roce *roce, struct mlx5_core_dev *mdev) +{ + if (!mlx5_is_macsec_roce_supported(mdev)) + return; + + mlx5_del_flow_rules(roce->nic_miss.rule); + mlx5_del_flow_rules(roce->rule); + mlx5_modify_header_dealloc(mdev, roce->copy_modify_hdr); + mlx5_destroy_flow_group(roce->nic_miss.g); + mlx5_destroy_flow_group(roce->g); + mlx5_destroy_flow_table(roce->ft); + + macsec_fs_rx_roce_miss_destroy(&roce->miss); + mlx5_destroy_flow_table(roce->ft_macsec_op_check); + mlx5_destroy_flow_table(roce->ft_ip_check); +} + +static void macsec_fs_rx_destroy(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5_macsec_tables *rx_tables; + int i; + + /* Rx check table */ + for (i = 1; i >= 0; --i) { + if (rx_fs->check_rule[i]) { + mlx5_del_flow_rules(rx_fs->check_rule[i]); + rx_fs->check_rule[i] = NULL; + } + + if (rx_fs->check_rule_pkt_reformat[i]) { + mlx5_packet_reformat_dealloc(macsec_fs->mdev, + rx_fs->check_rule_pkt_reformat[i]); + rx_fs->check_rule_pkt_reformat[i] = NULL; + } + } + + rx_tables = &rx_fs->tables; + + if (rx_tables->check_miss_rule) { + mlx5_del_flow_rules(rx_tables->check_miss_rule); + rx_tables->check_miss_rule = NULL; + } + + if (rx_tables->ft_check_group) { + mlx5_destroy_flow_group(rx_tables->ft_check_group); + rx_tables->ft_check_group = NULL; + } + + if (rx_tables->ft_check) { + mlx5_destroy_flow_table(rx_tables->ft_check); + rx_tables->ft_check = NULL; + } + + /* Rx crypto table */ + if (rx_tables->crypto_miss_rule) { + mlx5_del_flow_rules(rx_tables->crypto_miss_rule); + rx_tables->crypto_miss_rule = NULL; + } + + macsec_fs_destroy_flow_table(&rx_tables->ft_crypto); + + macsec_fs_rdma_rx_destroy(&macsec_fs->rx_fs->roce, macsec_fs->mdev); +} + +static int macsec_fs_rx_create_crypto_table_groups(struct mlx5_macsec_flow_table *ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int mclen = MLX5_ST_SZ_BYTES(fte_match_param); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(RX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + /* Flow group for SA rule with SCI */ + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS_5); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + + MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_2); + MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_3); + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Flow group for SA rule without SCI */ + memset(in, 0, inlen); + memset(mc, 0, mclen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS_5); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_15_0); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + + MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Flow Group for l2 traps */ + memset(in, 0, inlen); + memset(mc, 0, mclen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static int macsec_fs_rx_create_check_decap_rule(struct mlx5_macsec_fs *macsec_fs, + struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_spec *spec, + int reformat_param_size) +{ + int rule_index = (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI) ? 0 : 1; + u8 mlx5_reformat_buf[MLX5_SECTAG_HEADER_SIZE_WITH_SCI]; + struct mlx5_pkt_reformat_params reformat_params = {}; + struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_flow_destination roce_dest[2]; + struct mlx5_macsec_tables *rx_tables; + struct mlx5_flow_handle *rule; + int err = 0, dstn = 0; + + rx_tables = &rx_fs->tables; + + /* Rx check table decap 16B rule */ + memset(dest, 0, sizeof(*dest)); + memset(flow_act, 0, sizeof(*flow_act)); + memset(spec, 0, sizeof(*spec)); + + reformat_params.type = MLX5_REFORMAT_TYPE_DEL_MACSEC; + reformat_params.size = reformat_param_size; + reformat_params.data = mlx5_reformat_buf; + flow_act->pkt_reformat = mlx5_packet_reformat_alloc(mdev, + &reformat_params, + MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC); + if (IS_ERR(flow_act->pkt_reformat)) { + err = PTR_ERR(flow_act->pkt_reformat); + mlx5_core_err(mdev, "Failed to allocate MACsec Rx reformat context err=%d\n", err); + return err; + } + rx_fs->check_rule_pkt_reformat[rule_index] = flow_act->pkt_reformat; + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + /* MACsec syndrome match */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.macsec_syndrome); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.macsec_syndrome, 0); + /* ASO return reg syndrome match */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5; + /* Sectag TCI SC present bit*/ + MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + + if (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI) + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT << + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + + flow_act->flags = FLOW_ACT_NO_APPEND; + + if (rx_fs->roce.ft) { + flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + roce_dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + roce_dest[dstn].ft = rx_fs->roce.ft; + dstn++; + } else { + flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + } + + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + roce_dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + roce_dest[dstn].counter_id = mlx5_fc_id(rx_tables->check_rule_counter); + rule = mlx5_add_flow_rules(rx_tables->ft_check, spec, flow_act, roce_dest, dstn + 1); + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to add MACsec Rx check rule, err=%d\n", err); + return err; + } + + rx_fs->check_rule[rule_index] = rule; + + return 0; +} + +static int macsec_fs_rx_roce_miss_create(struct mlx5_core_dev *mdev, + struct mlx5_macsec_rx_roce *roce) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_group *flow_group; + struct mlx5_flow_handle *rule; + u32 *flow_group_in; + int err; + + flow_group_in = kvzalloc(MLX5_ST_SZ_BYTES(create_flow_group_in), GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + /* IP check ft has no miss rule since we use default miss action which is go to next PRIO */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, + roce->ft_macsec_op_check->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + roce->ft_macsec_op_check->max_fte - 1); + flow_group = mlx5_create_flow_group(roce->ft_macsec_op_check, flow_group_in); + if (IS_ERR(flow_group)) { + err = PTR_ERR(flow_group); + mlx5_core_err(mdev, + "Failed to create miss flow group for MACsec RoCE operation check table err(%d)\n", + err); + goto err_macsec_op_miss_group; + } + roce->miss.g = flow_group; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + rule = mlx5_add_flow_rules(roce->ft_macsec_op_check, NULL, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to add miss rule to MACsec RoCE operation check table err(%d)\n", + err); + goto err_macsec_op_rule; + } + roce->miss.rule = rule; + + kvfree(flow_group_in); + return 0; + +err_macsec_op_rule: + mlx5_destroy_flow_group(roce->miss.g); +err_macsec_op_miss_group: + kvfree(flow_group_in); + return err; +} + +#define MLX5_RX_ROCE_GROUP_SIZE BIT(0) + +static int macsec_fs_rx_roce_jump_to_rdma_groups_create(struct mlx5_core_dev *mdev, + struct mlx5_macsec_rx_roce *roce) +{ + struct mlx5_flow_group *g; + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + in = kvzalloc(MLX5_ST_SZ_BYTES(create_flow_group_in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport); + + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_RX_ROCE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + g = mlx5_create_flow_group(roce->ft, in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, "Failed to create main flow group for MACsec RoCE NIC UDP table err(%d)\n", + err); + goto err_udp_group; + } + roce->g = g; + + memset(in, 0, MLX5_ST_SZ_BYTES(create_flow_group_in)); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_RX_ROCE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + g = mlx5_create_flow_group(roce->ft, in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, "Failed to create miss flow group for MACsec RoCE NIC UDP table err(%d)\n", + err); + goto err_udp_miss_group; + } + roce->nic_miss.g = g; + + kvfree(in); + return 0; + +err_udp_miss_group: + mlx5_destroy_flow_group(roce->g); +err_udp_group: + kvfree(in); + return err; +} + +static int macsec_fs_rx_roce_jump_to_rdma_rules_create(struct mlx5_macsec_fs *macsec_fs, + struct mlx5_macsec_rx_roce *roce) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_flow_destination dst = {}; + struct mlx5_modify_hdr *modify_hdr; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_UDP); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, ROCE_V2_UDP_DPORT); + + MLX5_SET(copy_action_in, action, action_type, MLX5_ACTION_TYPE_COPY); + MLX5_SET(copy_action_in, action, src_field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); + MLX5_SET(copy_action_in, action, src_offset, 0); + MLX5_SET(copy_action_in, action, length, 32); + MLX5_SET(copy_action_in, action, dst_field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_5); + MLX5_SET(copy_action_in, action, dst_offset, 0); + + modify_hdr = mlx5_modify_header_alloc(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC, + 1, action); + + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + mlx5_core_err(mdev, + "Failed to alloc macsec copy modify_header_id err(%d)\n", err); + goto err_alloc_hdr; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_act.modify_hdr = modify_hdr; + dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; + dst.ft = roce->ft_ip_check; + rule = mlx5_add_flow_rules(roce->ft, spec, &flow_act, &dst, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to add rule to MACsec RoCE NIC UDP table err(%d)\n", + err); + goto err_add_rule; + } + roce->rule = rule; + roce->copy_modify_hdr = modify_hdr; + + memset(&flow_act, 0, sizeof(flow_act)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + rule = mlx5_add_flow_rules(roce->ft, NULL, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to add miss rule to MACsec RoCE NIC UDP table err(%d)\n", + err); + goto err_add_rule2; + } + roce->nic_miss.rule = rule; + + kvfree(spec); + return 0; + +err_add_rule2: + mlx5_del_flow_rules(roce->rule); +err_add_rule: + mlx5_modify_header_dealloc(macsec_fs->mdev, modify_hdr); +err_alloc_hdr: + kvfree(spec); + return err; +} + +static int macsec_fs_rx_roce_jump_to_rdma_create(struct mlx5_macsec_fs *macsec_fs, + struct mlx5_macsec_rx_roce *roce) +{ + int err; + + err = macsec_fs_rx_roce_jump_to_rdma_groups_create(macsec_fs->mdev, roce); + if (err) + return err; + + err = macsec_fs_rx_roce_jump_to_rdma_rules_create(macsec_fs, roce); + if (err) + goto err; + + return 0; +err: + mlx5_destroy_flow_group(roce->nic_miss.g); + mlx5_destroy_flow_group(roce->g); + return err; +} + +static int macsec_fs_rx_roce_create(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + int err = 0; + + if (!mlx5_is_macsec_roce_supported(macsec_fs->mdev)) { + mlx5_core_dbg(mdev, "Failed to init RoCE MACsec, capabilities not supported\n"); + return 0; + } + + ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_RDMA_RX_MACSEC); + if (!ns) + return -ENOMEM; + + ft = macsec_fs_auto_group_table_create(ns, 0, RDMA_RX_ROCE_IP_TABLE_LEVEL, + CRYPTO_NUM_MAXSEC_FTE); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, + "Failed to create MACsec IP check RoCE table err(%d)\n", err); + return err; + } + rx_fs->roce.ft_ip_check = ft; + + ft = macsec_fs_auto_group_table_create(ns, 0, RDMA_RX_ROCE_MACSEC_OP_TABLE_LEVEL, + CRYPTO_NUM_MAXSEC_FTE); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, + "Failed to create MACsec operation check RoCE table err(%d)\n", + err); + goto err_macsec_op; + } + rx_fs->roce.ft_macsec_op_check = ft; + + err = macsec_fs_rx_roce_miss_create(mdev, &rx_fs->roce); + if (err) + goto err_miss_create; + + ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC); + if (!ns) { + err = -EOPNOTSUPP; + goto err_ns; + } + + ft_attr.level = RX_ROCE_TABLE_LEVEL; + ft_attr.max_fte = RX_ROCE_TABLE_NUM_FTE; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, + "Failed to create MACsec jump to RX RoCE, NIC table err(%d)\n", err); + goto err_ns; + } + rx_fs->roce.ft = ft; + + err = macsec_fs_rx_roce_jump_to_rdma_create(macsec_fs, &rx_fs->roce); + if (err) + goto err_udp_ft; + + return 0; + +err_udp_ft: + mlx5_destroy_flow_table(rx_fs->roce.ft); +err_ns: + macsec_fs_rx_roce_miss_destroy(&rx_fs->roce.miss); +err_miss_create: + mlx5_destroy_flow_table(rx_fs->roce.ft_macsec_op_check); +err_macsec_op: + mlx5_destroy_flow_table(rx_fs->roce.ft_ip_check); + return err; +} + +static int macsec_fs_rx_create(struct mlx5_macsec_fs *macsec_fs) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_macsec_flow_table *ft_crypto; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5_macsec_tables *rx_tables; + struct mlx5_flow_table *flow_table; + struct mlx5_flow_group *flow_group; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + u32 *flow_group_in; + int err; + + ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC); + if (!ns) + return -ENOMEM; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + err = -ENOMEM; + goto free_spec; + } + + rx_tables = &rx_fs->tables; + ft_crypto = &rx_tables->ft_crypto; + + err = macsec_fs_rx_roce_create(macsec_fs); + if (err) + goto out_flow_group; + + /* Rx crypto table */ + ft_attr.level = RX_CRYPTO_TABLE_LEVEL; + ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE; + + flow_table = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(flow_table)) { + err = PTR_ERR(flow_table); + mlx5_core_err(mdev, "Failed to create MACsec Rx crypto table err(%d)\n", err); + goto err; + } + ft_crypto->t = flow_table; + + /* Rx crypto table groups */ + err = macsec_fs_rx_create_crypto_table_groups(ft_crypto); + if (err) { + mlx5_core_err(mdev, + "Failed to create default flow group for MACsec Tx crypto table err(%d)\n", + err); + goto err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add MACsec Rx crypto table default miss rule %d\n", + err); + goto err; + } + rx_tables->crypto_miss_rule = rule; + + /* Rx check table */ + flow_table = macsec_fs_auto_group_table_create(ns, + MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT, + RX_CHECK_TABLE_LEVEL, + RX_CHECK_TABLE_NUM_FTE); + if (IS_ERR(flow_table)) { + err = PTR_ERR(flow_table); + mlx5_core_err(mdev, "Fail to create MACsec RX check table, err(%d)\n", err); + goto err; + } + rx_tables->ft_check = flow_table; + + /* Rx check table Default miss group/rule */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1); + flow_group = mlx5_create_flow_group(rx_tables->ft_check, flow_group_in); + if (IS_ERR(flow_group)) { + err = PTR_ERR(flow_group); + mlx5_core_err(mdev, + "Failed to create default flow group for MACsec Rx check table err(%d)\n", + err); + goto err; + } + rx_tables->ft_check_group = flow_group; + + /* Rx check table default drop rule */ + memset(&flow_act, 0, sizeof(flow_act)); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(rx_tables->check_miss_rule_counter); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + rule = mlx5_add_flow_rules(rx_tables->ft_check, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to added MACsec Rx check drop rule, err(%d)\n", err); + goto err; + } + rx_tables->check_miss_rule = rule; + + /* Rx check table decap rules */ + err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec, + MLX5_SECTAG_HEADER_SIZE_WITH_SCI); + if (err) + goto err; + + err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec, + MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI); + if (err) + goto err; + + goto out_flow_group; + +err: + macsec_fs_rx_destroy(macsec_fs); +out_flow_group: + kvfree(flow_group_in); +free_spec: + kvfree(spec); + return err; +} + +static int macsec_fs_rx_ft_get(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables; + int err = 0; + + if (rx_tables->refcnt) + goto out; + + err = macsec_fs_rx_create(macsec_fs); + if (err) + return err; + +out: + rx_tables->refcnt++; + return err; +} + +static void macsec_fs_rx_ft_put(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables; + + if (--rx_tables->refcnt) + return; + + macsec_fs_rx_destroy(macsec_fs); +} + +static void macsec_fs_rx_del_rule(struct mlx5_macsec_fs *macsec_fs, + struct mlx5_macsec_rx_rule *rx_rule, + void *macdev, u32 fs_id) +{ + int i; + + macsec_fs_id_del(&macsec_fs->macsec_devices_list, fs_id, macdev, + &macsec_fs->fs_id_hash, false); + + for (i = 0; i < RX_NUM_OF_RULES_PER_SA; ++i) { + if (rx_rule->rule[i]) { + mlx5_del_flow_rules(rx_rule->rule[i]); + rx_rule->rule[i] = NULL; + } + } + + if (rx_rule->meta_modhdr) { + mlx5_modify_header_dealloc(macsec_fs->mdev, rx_rule->meta_modhdr); + rx_rule->meta_modhdr = NULL; + } + + kfree(rx_rule); + + macsec_fs_rx_ft_put(macsec_fs); +} + +static void macsec_fs_rx_setup_fte(struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_macsec_rule_attrs *attrs, + bool sci_present) +{ + u8 tci_an = (sci_present << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET) | attrs->assoc_num; + struct mlx5_flow_act_crypto_params *crypto_params = &flow_act->crypto; + __be32 *sci_p = (__be32 *)(&attrs->sci); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + /* MACsec ethertype */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_MACSEC); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5; + + /* Sectag AN + TCI SC present bit*/ + MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0, + tci_an << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + + if (sci_present) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_5.macsec_tag_2); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_2, + be32_to_cpu(sci_p[0])); + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_5.macsec_tag_3); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_3, + be32_to_cpu(sci_p[1])); + } else { + /* When SCI isn't present in the Sectag, need to match the source */ + /* MAC address only if the SCI contains the default MACsec PORT */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.smac_47_16), + sci_p, ETH_ALEN); + } + + crypto_params->type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC; + crypto_params->obj_id = attrs->macsec_obj_id; +} + +static union mlx5_macsec_rule * +macsec_fs_rx_add_rule(struct mlx5_macsec_fs *macsec_fs, + const struct macsec_context *macsec_ctx, + struct mlx5_macsec_rule_attrs *attrs, + u32 fs_id) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + union mlx5_macsec_rule *macsec_rule = NULL; + struct mlx5_modify_hdr *modify_hdr = NULL; + struct mlx5_macsec_flow_table *ft_crypto; + struct mlx5_flow_destination dest = {}; + struct mlx5_macsec_tables *rx_tables; + struct mlx5_macsec_rx_rule *rx_rule; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return NULL; + + err = macsec_fs_rx_ft_get(macsec_fs); + if (err) + goto out_spec; + + macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL); + if (!macsec_rule) { + macsec_fs_rx_ft_put(macsec_fs); + goto out_spec; + } + + rx_rule = &macsec_rule->rx_rule; + rx_tables = &rx_fs->tables; + ft_crypto = &rx_tables->ft_crypto; + + /* Set bit[31 - 30] macsec marker - 0x01 */ + /* Set bit[15-0] fs id */ + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); + MLX5_SET(set_action_in, action, data, macsec_fs_set_rx_fs_id(fs_id)); + MLX5_SET(set_action_in, action, offset, 0); + MLX5_SET(set_action_in, action, length, 32); + + modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC, + 1, action); + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + mlx5_core_err(mdev, "Fail to alloc MACsec set modify_header_id err=%d\n", err); + modify_hdr = NULL; + goto err; + } + rx_rule->meta_modhdr = modify_hdr; + + /* Rx crypto table with SCI rule */ + macsec_fs_rx_setup_fte(spec, &flow_act, attrs, true); + + flow_act.modify_hdr = modify_hdr; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = rx_tables->ft_check; + rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add SA with SCI rule to Rx crypto rule, err=%d\n", + err); + goto err; + } + rx_rule->rule[0] = rule; + + /* Rx crypto table without SCI rule */ + if ((cpu_to_be64((__force u64)attrs->sci) & 0xFFFF) == ntohs(MACSEC_PORT_ES)) { + memset(spec, 0, sizeof(struct mlx5_flow_spec)); + memset(&dest, 0, sizeof(struct mlx5_flow_destination)); + memset(&flow_act, 0, sizeof(flow_act)); + + macsec_fs_rx_setup_fte(spec, &flow_act, attrs, false); + + flow_act.modify_hdr = modify_hdr; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = rx_tables->ft_check; + rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add SA without SCI rule to Rx crypto rule, err=%d\n", + err); + goto err; + } + rx_rule->rule[1] = rule; + } + + err = macsec_fs_id_add(&macsec_fs->macsec_devices_list, fs_id, macsec_ctx->secy->netdev, + &macsec_fs->fs_id_hash, attrs->sci, false); + if (err) { + mlx5_core_err(mdev, "Failed to save fs_id, err=%d\n", err); + goto err; + } + + kvfree(spec); + return macsec_rule; + +err: + macsec_fs_rx_del_rule(macsec_fs, rx_rule, macsec_ctx->secy->netdev, fs_id); + macsec_rule = NULL; +out_spec: + kvfree(spec); + return macsec_rule; +} + +static int macsec_fs_rx_init(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_macsec_tables *rx_tables; + struct mlx5_macsec_rx *rx_fs; + struct mlx5_fc *flow_counter; + int err; + + rx_fs = kzalloc(sizeof(*rx_fs), GFP_KERNEL); + if (!rx_fs) + return -ENOMEM; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to create MACsec Rx encrypt flow counter, err(%d)\n", + err); + goto err_encrypt_counter; + } + + rx_tables = &rx_fs->tables; + rx_tables->check_rule_counter = flow_counter; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to create MACsec Rx drop flow counter, err(%d)\n", + err); + goto err_drop_counter; + } + rx_tables->check_miss_rule_counter = flow_counter; + + macsec_fs->rx_fs = rx_fs; + + return 0; + +err_drop_counter: + mlx5_fc_destroy(mdev, rx_tables->check_rule_counter); + rx_tables->check_rule_counter = NULL; + +err_encrypt_counter: + kfree(rx_fs); + macsec_fs->rx_fs = NULL; + + return err; +} + +static void macsec_fs_rx_cleanup(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_macsec_tables *rx_tables; + + if (!rx_fs) + return; + + rx_tables = &rx_fs->tables; + + if (rx_tables->refcnt) { + mlx5_core_err(mdev, + "Can't destroy MACsec offload rx_fs, refcnt(%u) isn't 0\n", + rx_tables->refcnt); + return; + } + + if (rx_tables->check_miss_rule_counter) { + mlx5_fc_destroy(mdev, rx_tables->check_miss_rule_counter); + rx_tables->check_miss_rule_counter = NULL; + } + + if (rx_tables->check_rule_counter) { + mlx5_fc_destroy(mdev, rx_tables->check_rule_counter); + rx_tables->check_rule_counter = NULL; + } + + kfree(rx_fs); + macsec_fs->rx_fs = NULL; +} + +static void set_ipaddr_spec_v4(struct sockaddr_in *in, struct mlx5_flow_spec *spec, bool is_dst_ip) +{ + MLX5_SET(fte_match_param, spec->match_value, + outer_headers.ip_version, MLX5_FS_IPV4_VERSION); + + if (is_dst_ip) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &in->sin_addr.s_addr, 4); + } else { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), + &in->sin_addr.s_addr, 4); + } +} + +static void set_ipaddr_spec_v6(struct sockaddr_in6 *in6, struct mlx5_flow_spec *spec, + bool is_dst_ip) +{ + MLX5_SET(fte_match_param, spec->match_value, + outer_headers.ip_version, MLX5_FS_IPV6_VERSION); + + if (is_dst_ip) { + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, 16); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &in6->sin6_addr, 16); + } else { + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, 16); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + &in6->sin6_addr, 16); + } +} + +static void set_ipaddr_spec(const struct sockaddr *addr, + struct mlx5_flow_spec *spec, bool is_dst_ip) +{ + struct sockaddr_in6 *in6; + + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.ip_version); + + if (addr->sa_family == AF_INET) { + struct sockaddr_in *in = (struct sockaddr_in *)addr; + + set_ipaddr_spec_v4(in, spec, is_dst_ip); + return; + } + + in6 = (struct sockaddr_in6 *)addr; + set_ipaddr_spec_v6(in6, spec, is_dst_ip); +} + +static void macsec_fs_del_roce_rule_rx(struct mlx5_roce_macsec_rx_rule *rx_rule) +{ + mlx5_del_flow_rules(rx_rule->op); + mlx5_del_flow_rules(rx_rule->ip); + list_del(&rx_rule->entry); + kfree(rx_rule); +} + +static void macsec_fs_del_roce_rules_rx(struct mlx5_macsec_fs *macsec_fs, u32 fs_id, + struct list_head *rx_rules_list) +{ + struct mlx5_roce_macsec_rx_rule *rx_rule, *next; + + if (!mlx5_is_macsec_roce_supported(macsec_fs->mdev)) + return; + + list_for_each_entry_safe(rx_rule, next, rx_rules_list, entry) { + if (rx_rule->fs_id == fs_id) + macsec_fs_del_roce_rule_rx(rx_rule); + } +} + +static void macsec_fs_del_roce_rule_tx(struct mlx5_core_dev *mdev, + struct mlx5_roce_macsec_tx_rule *tx_rule) +{ + mlx5_del_flow_rules(tx_rule->rule); + mlx5_modify_header_dealloc(mdev, tx_rule->meta_modhdr); + list_del(&tx_rule->entry); + kfree(tx_rule); +} + +static void macsec_fs_del_roce_rules_tx(struct mlx5_macsec_fs *macsec_fs, u32 fs_id, + struct list_head *tx_rules_list) +{ + struct mlx5_roce_macsec_tx_rule *tx_rule, *next; + + if (!mlx5_is_macsec_roce_supported(macsec_fs->mdev)) + return; + + list_for_each_entry_safe(tx_rule, next, tx_rules_list, entry) { + if (tx_rule->fs_id == fs_id) + macsec_fs_del_roce_rule_tx(macsec_fs->mdev, tx_rule); + } +} + +void mlx5_macsec_fs_get_stats_fill(struct mlx5_macsec_fs *macsec_fs, void *macsec_stats) +{ + struct mlx5_macsec_stats *stats = (struct mlx5_macsec_stats *)macsec_stats; + struct mlx5_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables; + struct mlx5_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + + if (tx_tables->check_rule_counter) + mlx5_fc_query(mdev, tx_tables->check_rule_counter, + &stats->macsec_tx_pkts, &stats->macsec_tx_bytes); + + if (tx_tables->check_miss_rule_counter) + mlx5_fc_query(mdev, tx_tables->check_miss_rule_counter, + &stats->macsec_tx_pkts_drop, &stats->macsec_tx_bytes_drop); + + if (rx_tables->check_rule_counter) + mlx5_fc_query(mdev, rx_tables->check_rule_counter, + &stats->macsec_rx_pkts, &stats->macsec_rx_bytes); + + if (rx_tables->check_miss_rule_counter) + mlx5_fc_query(mdev, rx_tables->check_miss_rule_counter, + &stats->macsec_rx_pkts_drop, &stats->macsec_rx_bytes_drop); +} + +struct mlx5_macsec_stats *mlx5_macsec_fs_get_stats(struct mlx5_macsec_fs *macsec_fs) +{ + if (!macsec_fs) + return NULL; + + return &macsec_fs->stats; +} + +u32 mlx5_macsec_fs_get_fs_id_from_hashtable(struct mlx5_macsec_fs *macsec_fs, sci_t *sci) +{ + struct mlx5_fs_id *mlx5_fs_id; + u32 fs_id = 0; + + rcu_read_lock(); + mlx5_fs_id = rhashtable_lookup(&macsec_fs->sci_hash, sci, rhash_sci); + if (mlx5_fs_id) + fs_id = mlx5_fs_id->id; + rcu_read_unlock(); + + return fs_id; +} + +union mlx5_macsec_rule * +mlx5_macsec_fs_add_rule(struct mlx5_macsec_fs *macsec_fs, + const struct macsec_context *macsec_ctx, + struct mlx5_macsec_rule_attrs *attrs, + u32 *sa_fs_id) +{ + struct mlx5_macsec_event_data data = {.macsec_fs = macsec_fs, + .macdev = macsec_ctx->secy->netdev, + .is_tx = + (attrs->action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) + }; + union mlx5_macsec_rule *macsec_rule; + u32 tx_new_fs_id; + + macsec_rule = (attrs->action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ? + macsec_fs_tx_add_rule(macsec_fs, macsec_ctx, attrs, &tx_new_fs_id) : + macsec_fs_rx_add_rule(macsec_fs, macsec_ctx, attrs, *sa_fs_id); + + data.fs_id = (data.is_tx) ? tx_new_fs_id : *sa_fs_id; + if (macsec_rule) + blocking_notifier_call_chain(&macsec_fs->mdev->macsec_nh, + MLX5_DRIVER_EVENT_MACSEC_SA_ADDED, + &data); + + return macsec_rule; +} + +void mlx5_macsec_fs_del_rule(struct mlx5_macsec_fs *macsec_fs, + union mlx5_macsec_rule *macsec_rule, + int action, void *macdev, u32 sa_fs_id) +{ + struct mlx5_macsec_event_data data = {.macsec_fs = macsec_fs, + .macdev = macdev, + .is_tx = (action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) + }; + + data.fs_id = (data.is_tx) ? macsec_rule->tx_rule.fs_id : sa_fs_id; + blocking_notifier_call_chain(&macsec_fs->mdev->macsec_nh, + MLX5_DRIVER_EVENT_MACSEC_SA_DELETED, + &data); + + (action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ? + macsec_fs_tx_del_rule(macsec_fs, &macsec_rule->tx_rule, macdev) : + macsec_fs_rx_del_rule(macsec_fs, &macsec_rule->rx_rule, macdev, sa_fs_id); +} + +static int mlx5_macsec_fs_add_roce_rule_rx(struct mlx5_macsec_fs *macsec_fs, u32 fs_id, u16 gid_idx, + const struct sockaddr *addr, + struct list_head *rx_rules_list) +{ + struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5_roce_macsec_rx_rule *rx_rule; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *new_rule; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + rx_rule = kzalloc(sizeof(*rx_rule), GFP_KERNEL); + if (!rx_rule) { + err = -ENOMEM; + goto out; + } + + set_ipaddr_spec(addr, spec, true); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.ft = rx_fs->roce.ft_macsec_op_check; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + new_rule = mlx5_add_flow_rules(rx_fs->roce.ft_ip_check, spec, &flow_act, + &dest, 1); + if (IS_ERR(new_rule)) { + err = PTR_ERR(new_rule); + goto ip_rule_err; + } + rx_rule->ip = new_rule; + + memset(&flow_act, 0, sizeof(flow_act)); + memset(spec, 0, sizeof(*spec)); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_5); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_5, + macsec_fs_set_rx_fs_id(fs_id)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + new_rule = mlx5_add_flow_rules(rx_fs->roce.ft_macsec_op_check, spec, &flow_act, + NULL, 0); + if (IS_ERR(new_rule)) { + err = PTR_ERR(new_rule); + goto op_rule_err; + } + rx_rule->op = new_rule; + rx_rule->gid_idx = gid_idx; + rx_rule->fs_id = fs_id; + list_add_tail(&rx_rule->entry, rx_rules_list); + + goto out; + +op_rule_err: + mlx5_del_flow_rules(rx_rule->ip); + rx_rule->ip = NULL; +ip_rule_err: + kfree(rx_rule); +out: + kvfree(spec); + return err; +} + +static int mlx5_macsec_fs_add_roce_rule_tx(struct mlx5_macsec_fs *macsec_fs, u32 fs_id, u16 gid_idx, + const struct sockaddr *addr, + struct list_head *tx_rules_list) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_modify_hdr *modify_hdr = NULL; + struct mlx5_roce_macsec_tx_rule *tx_rule; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *new_rule; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + tx_rule = kzalloc(sizeof(*tx_rule), GFP_KERNEL); + if (!tx_rule) { + err = -ENOMEM; + goto out; + } + + set_ipaddr_spec(addr, spec, false); + + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_A); + MLX5_SET(set_action_in, action, data, macsec_fs_set_tx_fs_id(fs_id)); + MLX5_SET(set_action_in, action, offset, 0); + MLX5_SET(set_action_in, action, length, 32); + + modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC, + 1, action); + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + mlx5_core_err(mdev, "Fail to alloc ROCE MACsec set modify_header_id err=%d\n", + err); + modify_hdr = NULL; + goto modify_hdr_err; + } + tx_rule->meta_modhdr = modify_hdr; + + flow_act.modify_hdr = modify_hdr; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; + dest.ft = tx_fs->tables.ft_crypto.t; + new_rule = mlx5_add_flow_rules(tx_fs->ft_rdma_tx, spec, &flow_act, &dest, 1); + if (IS_ERR(new_rule)) { + err = PTR_ERR(new_rule); + mlx5_core_err(mdev, "Failed to add ROCE TX rule, err=%d\n", err); + goto rule_err; + } + tx_rule->rule = new_rule; + tx_rule->gid_idx = gid_idx; + tx_rule->fs_id = fs_id; + list_add_tail(&tx_rule->entry, tx_rules_list); + + goto out; + +rule_err: + mlx5_modify_header_dealloc(mdev, tx_rule->meta_modhdr); +modify_hdr_err: + kfree(tx_rule); +out: + kvfree(spec); + return err; +} + +void mlx5_macsec_del_roce_rule(u16 gid_idx, struct mlx5_macsec_fs *macsec_fs, + struct list_head *tx_rules_list, struct list_head *rx_rules_list) +{ + struct mlx5_roce_macsec_rx_rule *rx_rule, *next_rx; + struct mlx5_roce_macsec_tx_rule *tx_rule, *next_tx; + + list_for_each_entry_safe(tx_rule, next_tx, tx_rules_list, entry) { + if (tx_rule->gid_idx == gid_idx) + macsec_fs_del_roce_rule_tx(macsec_fs->mdev, tx_rule); + } + + list_for_each_entry_safe(rx_rule, next_rx, rx_rules_list, entry) { + if (rx_rule->gid_idx == gid_idx) + macsec_fs_del_roce_rule_rx(rx_rule); + } +} +EXPORT_SYMBOL_GPL(mlx5_macsec_del_roce_rule); + +int mlx5_macsec_add_roce_rule(void *macdev, const struct sockaddr *addr, u16 gid_idx, + struct list_head *tx_rules_list, struct list_head *rx_rules_list, + struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_device *iter, *macsec_device = NULL; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_fs_id *fs_id_iter; + unsigned long index = 0; + int err; + + list_for_each_entry(iter, &macsec_fs->macsec_devices_list, macsec_devices_list_entry) { + if (iter->macdev == macdev) { + macsec_device = iter; + break; + } + } + + if (!macsec_device) + return 0; + + xa_for_each(&macsec_device->tx_id_xa, index, fs_id_iter) { + err = mlx5_macsec_fs_add_roce_rule_tx(macsec_fs, fs_id_iter->id, gid_idx, addr, + tx_rules_list); + if (err) { + mlx5_core_err(mdev, "MACsec offload: Failed to add roce TX rule\n"); + goto out; + } + } + + index = 0; + xa_for_each(&macsec_device->rx_id_xa, index, fs_id_iter) { + err = mlx5_macsec_fs_add_roce_rule_rx(macsec_fs, fs_id_iter->id, gid_idx, addr, + rx_rules_list); + if (err) { + mlx5_core_err(mdev, "MACsec offload: Failed to add roce TX rule\n"); + goto out; + } + } + + return 0; +out: + mlx5_macsec_del_roce_rule(gid_idx, macsec_fs, tx_rules_list, rx_rules_list); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_macsec_add_roce_rule); + +void mlx5_macsec_add_roce_sa_rules(u32 fs_id, const struct sockaddr *addr, u16 gid_idx, + struct list_head *tx_rules_list, + struct list_head *rx_rules_list, + struct mlx5_macsec_fs *macsec_fs, bool is_tx) +{ + (is_tx) ? + mlx5_macsec_fs_add_roce_rule_tx(macsec_fs, fs_id, gid_idx, addr, + tx_rules_list) : + mlx5_macsec_fs_add_roce_rule_rx(macsec_fs, fs_id, gid_idx, addr, + rx_rules_list); +} +EXPORT_SYMBOL_GPL(mlx5_macsec_add_roce_sa_rules); + +void mlx5_macsec_del_roce_sa_rules(u32 fs_id, struct mlx5_macsec_fs *macsec_fs, + struct list_head *tx_rules_list, + struct list_head *rx_rules_list, bool is_tx) +{ + (is_tx) ? + macsec_fs_del_roce_rules_tx(macsec_fs, fs_id, tx_rules_list) : + macsec_fs_del_roce_rules_rx(macsec_fs, fs_id, rx_rules_list); +} +EXPORT_SYMBOL_GPL(mlx5_macsec_del_roce_sa_rules); + +void mlx5_macsec_fs_cleanup(struct mlx5_macsec_fs *macsec_fs) +{ + macsec_fs_rx_cleanup(macsec_fs); + macsec_fs_tx_cleanup(macsec_fs); + rhashtable_destroy(&macsec_fs->fs_id_hash); + rhashtable_destroy(&macsec_fs->sci_hash); + kfree(macsec_fs); +} + +struct mlx5_macsec_fs * +mlx5_macsec_fs_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_macsec_fs *macsec_fs; + int err; + + macsec_fs = kzalloc(sizeof(*macsec_fs), GFP_KERNEL); + if (!macsec_fs) + return NULL; + + macsec_fs->mdev = mdev; + + err = rhashtable_init(&macsec_fs->sci_hash, &rhash_sci); + if (err) { + mlx5_core_err(mdev, "MACsec offload: Failed to init SCI hash table, err=%d\n", + err); + goto err_hash; + } + + err = rhashtable_init(&macsec_fs->fs_id_hash, &rhash_fs_id); + if (err) { + mlx5_core_err(mdev, "MACsec offload: Failed to init FS_ID hash table, err=%d\n", + err); + goto sci_hash_cleanup; + } + + err = macsec_fs_tx_init(macsec_fs); + if (err) { + mlx5_core_err(mdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err); + goto fs_id_hash_cleanup; + } + + err = macsec_fs_rx_init(macsec_fs); + if (err) { + mlx5_core_err(mdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err); + goto tx_cleanup; + } + + BLOCKING_INIT_NOTIFIER_HEAD(&mdev->macsec_nh); + + return macsec_fs; + +tx_cleanup: + macsec_fs_tx_cleanup(macsec_fs); +fs_id_hash_cleanup: + rhashtable_destroy(&macsec_fs->fs_id_hash); +sci_hash_cleanup: + rhashtable_destroy(&macsec_fs->sci_hash); +err_hash: + kfree(macsec_fs); + return NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.h new file mode 100644 index 000000000000..34b80c3ef6a5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_MACSEC_STEERING_H__ +#define __MLX5_MACSEC_STEERING_H__ + +#ifdef CONFIG_MLX5_MACSEC + +/* Bit31 - 30: MACsec marker, Bit15-0: MACsec id */ +#define MLX5_MACEC_RX_FS_ID_MAX USHRT_MAX /* Must be power of two */ +#define MLX5_MACSEC_RX_FS_ID_MASK MLX5_MACEC_RX_FS_ID_MAX +#define MLX5_MACSEC_METADATA_MARKER(metadata) ((((metadata) >> 30) & 0x3) == 0x1) +#define MLX5_MACSEC_RX_METADAT_HANDLE(metadata) ((metadata) & MLX5_MACSEC_RX_FS_ID_MASK) + +#define MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES 16 + +struct mlx5_macsec_fs; +union mlx5_macsec_rule; + +struct mlx5_macsec_rule_attrs { + sci_t sci; + u32 macsec_obj_id; + u8 assoc_num; + int action; +}; + +struct mlx5_macsec_stats { + u64 macsec_rx_pkts; + u64 macsec_rx_bytes; + u64 macsec_rx_pkts_drop; + u64 macsec_rx_bytes_drop; + u64 macsec_tx_pkts; + u64 macsec_tx_bytes; + u64 macsec_tx_pkts_drop; + u64 macsec_tx_bytes_drop; +}; + +enum mlx5_macsec_action { + MLX5_ACCEL_MACSEC_ACTION_ENCRYPT, + MLX5_ACCEL_MACSEC_ACTION_DECRYPT, +}; + +void mlx5_macsec_fs_cleanup(struct mlx5_macsec_fs *macsec_fs); + +struct mlx5_macsec_fs * +mlx5_macsec_fs_init(struct mlx5_core_dev *mdev); + +union mlx5_macsec_rule * +mlx5_macsec_fs_add_rule(struct mlx5_macsec_fs *macsec_fs, + const struct macsec_context *ctx, + struct mlx5_macsec_rule_attrs *attrs, + u32 *sa_fs_id); + +void mlx5_macsec_fs_del_rule(struct mlx5_macsec_fs *macsec_fs, + union mlx5_macsec_rule *macsec_rule, + int action, void *macdev, u32 sa_fs_id); + +void mlx5_macsec_fs_get_stats_fill(struct mlx5_macsec_fs *macsec_fs, void *macsec_stats); +struct mlx5_macsec_stats *mlx5_macsec_fs_get_stats(struct mlx5_macsec_fs *macsec_fs); +u32 mlx5_macsec_fs_get_fs_id_from_hashtable(struct mlx5_macsec_fs *macsec_fs, sci_t *sci); + +#endif + +#endif /* __MLX5_MACSEC_STEERING_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 88dbea6631d5..15561965d2af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -49,7 +49,6 @@ #include <linux/version.h> #include <net/devlink.h> #include "mlx5_core.h" -#include "thermal.h" #include "lib/eq.h" #include "fs_core.h" #include "lib/mpfs.h" @@ -73,6 +72,7 @@ #include "sf/dev/dev.h" #include "sf/sf.h" #include "mlx5_irq.h" +#include "hwmon.h" MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -361,9 +361,8 @@ void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_core_uplink_netdev_event_replay); -static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, - enum mlx5_cap_type cap_type, - enum mlx5_cap_mode cap_mode) +int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, + enum mlx5_cap_mode cap_mode) { u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)]; int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); @@ -951,10 +950,10 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) { int err; - dev->priv.devcom = mlx5_devcom_register_device(dev); - if (IS_ERR(dev->priv.devcom)) - mlx5_core_err(dev, "failed to register with devcom (0x%p)\n", - dev->priv.devcom); + dev->priv.devc = mlx5_devcom_register_device(dev); + if (IS_ERR(dev->priv.devc)) + mlx5_core_warn(dev, "failed to register devcom device %ld\n", + PTR_ERR(dev->priv.devc)); err = mlx5_query_board_id(dev); if (err) { @@ -1089,7 +1088,7 @@ err_eq_cleanup: err_irq_cleanup: mlx5_irq_table_cleanup(dev); err_devcom: - mlx5_devcom_unregister_device(dev->priv.devcom); + mlx5_devcom_unregister_device(dev->priv.devc); return err; } @@ -1118,7 +1117,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_events_cleanup(dev); mlx5_eq_table_cleanup(dev); mlx5_irq_table_cleanup(dev); - mlx5_devcom_unregister_device(dev->priv.devcom); + mlx5_devcom_unregister_device(dev->priv.devc); } static int mlx5_function_enable(struct mlx5_core_dev *dev, bool boot, u64 timeout) @@ -1142,7 +1141,7 @@ static int mlx5_function_enable(struct mlx5_core_dev *dev, bool boot, u64 timeou return err; } - err = mlx5_cmd_init(dev); + err = mlx5_cmd_enable(dev); if (err) { mlx5_core_err(dev, "Failed initializing command interface, aborting\n"); return err; @@ -1196,7 +1195,7 @@ stop_health_poll: mlx5_stop_health_poll(dev, boot); err_cmd_cleanup: mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); - mlx5_cmd_cleanup(dev); + mlx5_cmd_disable(dev); return err; } @@ -1207,7 +1206,7 @@ static void mlx5_function_disable(struct mlx5_core_dev *dev, bool boot) mlx5_core_disable_hca(dev, 0); mlx5_stop_health_poll(dev, boot); mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); - mlx5_cmd_cleanup(dev); + mlx5_cmd_disable(dev); } static int mlx5_function_open(struct mlx5_core_dev *dev) @@ -1506,6 +1505,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev) if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { mlx5_core_warn(dev, "%s: interface is down, NOP\n", __func__); + mlx5_devlink_params_unregister(priv_to_devlink(dev)); mlx5_cleanup_once(dev); goto out; } @@ -1619,21 +1619,24 @@ static int mlx5_query_hca_caps_light(struct mlx5_core_dev *dev) return err; if (MLX5_CAP_GEN(dev, eth_net_offloads)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ETHERNET_OFFLOADS, + HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN(dev, nic_flow_table) || MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_FLOW_TABLE, + HCA_CAP_OPMOD_GET_CUR); if (err) return err; } if (MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) { - err = mlx5_core_get_caps(dev, MLX5_CAP_VDPA_EMULATION); + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_VDPA_EMULATION, + HCA_CAP_OPMOD_GET_CUR); if (err) return err; } @@ -1713,7 +1716,6 @@ static const int types[] = { MLX5_CAP_FLOW_TABLE, MLX5_CAP_ESWITCH_FLOW_TABLE, MLX5_CAP_ESWITCH, - MLX5_CAP_VECTOR_CALC, MLX5_CAP_QOS, MLX5_CAP_DEBUG, MLX5_CAP_DEV_MEM, @@ -1722,7 +1724,6 @@ static const int types[] = { MLX5_CAP_VDPA_EMULATION, MLX5_CAP_IPSEC, MLX5_CAP_PORT_SELECTION, - MLX5_CAP_DEV_SHAMPO, MLX5_CAP_MACSEC, MLX5_CAP_ADV_VIRTUALIZATION, MLX5_CAP_CRYPTO, @@ -1796,6 +1797,12 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) debugfs_create_file("vhca_id", 0400, priv->dbg.dbg_root, dev, &vhca_id_fops); INIT_LIST_HEAD(&priv->traps); + err = mlx5_cmd_init(dev); + if (err) { + mlx5_core_err(dev, "Failed initializing cmdif SW structs, aborting\n"); + goto err_cmd_init; + } + err = mlx5_tout_init(dev); if (err) { mlx5_core_err(dev, "Failed initializing timeouts, aborting\n"); @@ -1841,6 +1848,8 @@ err_pagealloc_init: err_health_init: mlx5_tout_cleanup(dev); err_timeout_init: + mlx5_cmd_cleanup(dev); +err_cmd_init: debugfs_remove(dev->priv.dbg.dbg_root); mutex_destroy(&priv->pgdir_mutex); mutex_destroy(&priv->alloc_mutex); @@ -1863,6 +1872,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); mlx5_tout_cleanup(dev); + mlx5_cmd_cleanup(dev); debugfs_remove_recursive(dev->priv.dbg.dbg_root); mutex_destroy(&priv->pgdir_mutex); mutex_destroy(&priv->alloc_mutex); @@ -1920,9 +1930,9 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) if (err) dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err); - err = mlx5_thermal_init(dev); + err = mlx5_hwmon_dev_register(dev); if (err) - dev_err(&pdev->dev, "mlx5_thermal_init failed with error code %d\n", err); + mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err); pci_save_state(pdev); devlink_register(devlink); @@ -1954,7 +1964,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_drain_health_wq(dev); devlink_unregister(devlink); mlx5_sriov_disable(pdev, false); - mlx5_thermal_uninit(dev); + mlx5_hwmon_dev_unregister(dev); mlx5_crdump_disable(dev); mlx5_uninit_one(dev); mlx5_pci_close(dev); @@ -1988,7 +1998,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_enter_error_state(dev, false); mlx5_error_sw_reset(dev); - mlx5_unload_one(dev, true); + mlx5_unload_one(dev, false); mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index c4be257c043d..124352459c23 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -174,10 +174,16 @@ static inline int mlx5_flexible_inlen(struct mlx5_core_dev *dev, size_t fixed, #define MLX5_FLEXIBLE_INLEN(dev, fixed, item_size, num_items) \ mlx5_flexible_inlen(dev, fixed, item_size, num_items, __func__, __LINE__) +int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); +int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, + enum mlx5_cap_mode cap_mode); int mlx5_query_hca_caps(struct mlx5_core_dev *dev); int mlx5_query_board_id(struct mlx5_core_dev *dev); +int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num); int mlx5_cmd_init(struct mlx5_core_dev *dev); void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); +int mlx5_cmd_enable(struct mlx5_core_dev *dev); +void mlx5_cmd_disable(struct mlx5_core_dev *dev); void mlx5_cmd_set_state(struct mlx5_core_dev *dev, enum mlx5_cmdif_state cmdif_state); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id); @@ -361,7 +367,7 @@ static inline bool mlx5_core_is_ec_vf_vport(const struct mlx5_core_dev *dev, u16 static inline int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vport, bool ec_vf_func) { - return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev) + return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev) + 1 : vport; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h index aa403a5ea34e..1088114e905d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h @@ -29,9 +29,9 @@ void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq); struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, struct irq_affinity_desc *af_desc, struct cpu_rmap **rmap); -int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, - struct mlx5_irq **irqs, struct cpu_rmap **rmap); -void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs); +struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu, + u16 vecidx, struct cpu_rmap **rmap); +void mlx5_irq_release_vector(struct mlx5_irq *irq); int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb); int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb); struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq); @@ -39,17 +39,17 @@ int mlx5_irq_get_index(struct mlx5_irq *irq); struct mlx5_irq_pool; #ifdef CONFIG_MLX5_SF -int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, - struct mlx5_irq **irqs); +struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev, + struct cpumask *used_cpus, u16 vecidx); struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc); -void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs, - int num_irqs); +void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq); #else -static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, - struct mlx5_irq **irqs) +static inline +struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev, + struct cpumask *used_cpus, u16 vecidx) { - return -EOPNOTSUPP; + return ERR_PTR(-EOPNOTSUPP); } static inline struct mlx5_irq * @@ -58,7 +58,9 @@ mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc * return ERR_PTR(-EOPNOTSUPP); } -static inline void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, - struct mlx5_irq **irqs, int num_irqs) {} +static inline +void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq) +{ +} #endif #endif /* __MLX5_IRQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index cba2a4afb5fd..653648216730 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -259,8 +259,11 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, int err; irq = kzalloc(sizeof(*irq), GFP_KERNEL); - if (!irq) + if (!irq || !zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) { + kfree(irq); return ERR_PTR(-ENOMEM); + } + if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) { /* The vector at index 0 is always statically allocated. If * dynamic irq is not supported all vectors are statically @@ -297,11 +300,7 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, mlx5_core_err(dev, "Failed to request irq. err = %d\n", err); goto err_req_irq; } - if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) { - mlx5_core_warn(dev, "zalloc_cpumask_var failed\n"); - err = -ENOMEM; - goto err_cpumask; - } + if (af_desc) { cpumask_copy(irq->mask, &af_desc->mask); irq_set_affinity_and_hint(irq->map.virq, irq->mask); @@ -319,8 +318,6 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, err_xa: if (af_desc) irq_update_affinity_hint(irq->map.virq, NULL); - free_cpumask_var(irq->mask); -err_cpumask: free_irq(irq->map.virq, &irq->nh); err_req_irq: #ifdef CONFIG_RFS_ACCEL @@ -333,6 +330,7 @@ err_irq_rmap: if (i && pci_msix_can_alloc_dyn(dev->pdev)) pci_msix_free_irq(dev->pdev, irq->map); err_alloc_irq: + free_cpumask_var(irq->mask); kfree(irq); return ERR_PTR(err); } @@ -432,19 +430,10 @@ static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev) return pool ? pool : irq_table->pcif_pool; } -/** - * mlx5_irqs_release - release one or more IRQs back to the system. - * @irqs: IRQs to be released. - * @nirqs: number of IRQs to be released. - */ -static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs) +static void _mlx5_irq_release(struct mlx5_irq *irq) { - int i; - - for (i = 0; i < nirqs; i++) { - synchronize_irq(irqs[i]->map.virq); - mlx5_irq_put(irqs[i]); - } + synchronize_irq(irq->map.virq); + mlx5_irq_put(irq); } /** @@ -453,7 +442,7 @@ static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs) */ void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq) { - mlx5_irqs_release(&ctrl_irq, 1); + _mlx5_irq_release(ctrl_irq); } /** @@ -569,53 +558,42 @@ void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map) EXPORT_SYMBOL(mlx5_msix_free); /** - * mlx5_irqs_release_vectors - release one or more IRQs back to the system. - * @irqs: IRQs to be released. - * @nirqs: number of IRQs to be released. + * mlx5_irq_release_vector - release one IRQ back to the system. + * @irq: the irq to release. */ -void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs) +void mlx5_irq_release_vector(struct mlx5_irq *irq) { - mlx5_irqs_release(irqs, nirqs); + _mlx5_irq_release(irq); } /** - * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device. - * @dev: mlx5 device that is requesting the IRQs. - * @cpus: CPUs array for binding the IRQs - * @nirqs: number of IRQs to request. - * @irqs: an output array of IRQs pointers. + * mlx5_irq_request_vector - request one IRQ for mlx5 device. + * @dev: mlx5 device that is requesting the IRQ. + * @cpu: CPU to bind the IRQ to. + * @vecidx: vector index to request an IRQ for. * @rmap: pointer to reverse map pointer for completion interrupts * * Each IRQ is bound to at most 1 CPU. - * This function is requests nirqs IRQs, starting from @vecidx. + * This function is requests one IRQ, for the given @vecidx. * - * This function returns the number of IRQs requested, (which might be smaller than - * @nirqs), if successful, or a negative error code in case of an error. + * This function returns a pointer to the irq on success, or an error pointer + * in case of an error. */ -int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, - struct mlx5_irq **irqs, struct cpu_rmap **rmap) +struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu, + u16 vecidx, struct cpu_rmap **rmap) { struct mlx5_irq_table *table = mlx5_irq_table_get(dev); struct mlx5_irq_pool *pool = table->pcif_pool; struct irq_affinity_desc af_desc; - struct mlx5_irq *irq; int offset = 1; - int i; if (!pool->xa_num_irqs.max) offset = 0; af_desc.is_managed = false; - for (i = 0; i < nirqs; i++) { - cpumask_clear(&af_desc.mask); - cpumask_set_cpu(cpus[i], &af_desc.mask); - irq = mlx5_irq_request(dev, i + offset, &af_desc, rmap); - if (IS_ERR(irq)) - break; - irqs[i] = irq; - } - - return i ? i : PTR_ERR(irq); + cpumask_clear(&af_desc.mask); + cpumask_set_cpu(cpu, &af_desc.mask); + return mlx5_irq_request(dev, vecidx + offset, &af_desc, rmap); } static struct mlx5_irq_pool * diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 0daeb4b72cca..be70d1f23a5d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -271,7 +271,7 @@ void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, } EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); -static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num) +int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num) { u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pmlp_reg)]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c index 8e2abbab05f0..05e148db9889 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c @@ -129,7 +129,7 @@ static void mlx5_sf_dev_add(struct mlx5_core_dev *dev, u16 sf_index, u16 fn_id, err = auxiliary_device_add(&sf_dev->adev); if (err) { - put_device(&sf_dev->adev.dev); + auxiliary_device_uninit(&sf_dev->adev); goto add_err; } @@ -167,7 +167,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_ if (!max_functions) return 0; - base_id = MLX5_CAP_GEN(table->dev, sf_base_id); + base_id = mlx5_sf_start_function_id(table->dev); if (event->function_id < base_id || event->function_id >= (base_id + max_functions)) return 0; @@ -185,7 +185,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_ mlx5_sf_dev_del(table->dev, sf_dev, sf_index); else mlx5_core_err(table->dev, - "SF DEV: teardown state for invalid dev index=%d fn_id=0x%x\n", + "SF DEV: teardown state for invalid dev index=%d sfnum=0x%x\n", sf_index, event->sw_function_id); break; case MLX5_VHCA_STATE_ACTIVE: @@ -209,7 +209,7 @@ static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table) int i; max_functions = mlx5_sf_max_functions(dev); - function_id = MLX5_CAP_GEN(dev, sf_base_id); + function_id = mlx5_sf_start_function_id(dev); /* Arm the vhca context as the vhca event notifier */ for (i = 0; i < max_functions; i++) { err = mlx5_vhca_event_arm(dev, function_id); @@ -234,7 +234,7 @@ static void mlx5_sf_dev_add_active_work(struct work_struct *work) int i; max_functions = mlx5_sf_max_functions(dev); - function_id = MLX5_CAP_GEN(dev, sf_base_id); + function_id = mlx5_sf_start_function_id(dev); for (i = 0; i < max_functions; i++, function_id++) { if (table->stop_active_wq) return; @@ -299,7 +299,7 @@ void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) unsigned int max_sfs; int err; - if (!mlx5_sf_dev_supported(dev) || !mlx5_vhca_event_supported(dev)) + if (!mlx5_sf_dev_supported(dev)) return; table = kzalloc(sizeof(*table), GFP_KERNEL); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 6a3fa30b2bf2..e34a8f88c518 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -12,7 +12,7 @@ #include "diag/sf_tracepoint.h" struct mlx5_sf { - struct devlink_port dl_port; + struct mlx5_devlink_port dl_port; unsigned int port_index; u32 controller; u16 id; @@ -292,11 +292,11 @@ static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, if (IS_ERR(sf)) return PTR_ERR(sf); - err = mlx5_esw_offloads_sf_vport_enable(esw, &sf->dl_port, sf->hw_fn_id, - new_attr->controller, new_attr->sfnum); + err = mlx5_eswitch_load_sf_vport(esw, sf->hw_fn_id, MLX5_VPORT_UC_ADDR_CHANGE, + &sf->dl_port, new_attr->controller, new_attr->sfnum); if (err) goto esw_err; - *dl_port = &sf->dl_port; + *dl_port = &sf->dl_port.dl_port; trace_mlx5_sf_add(dev, sf->port_index, sf->controller, sf->hw_fn_id, new_attr->sfnum); return 0; @@ -400,7 +400,7 @@ int mlx5_devlink_sf_port_del(struct devlink *devlink, goto sf_err; } - mlx5_esw_offloads_sf_vport_disable(esw, sf->hw_fn_id); + mlx5_eswitch_unload_sf_vport(esw, sf->hw_fn_id); mlx5_sf_id_erase(table, sf); mutex_lock(&table->sf_state_lock); @@ -472,7 +472,7 @@ static void mlx5_sf_deactivate_all(struct mlx5_sf_table *table) * arrive. It is safe to destroy all user created SFs. */ xa_for_each(&table->port_indices, index, sf) { - mlx5_esw_offloads_sf_vport_disable(esw, sf->hw_fn_id); + mlx5_eswitch_unload_sf_vport(esw, sf->hw_fn_id); mlx5_sf_id_erase(table, sf); mlx5_sf_dealloc(table, sf); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c index 17aa348989cb..1f613320fe07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c @@ -9,6 +9,7 @@ #include "mlx5_core.h" #include "eswitch.h" #include "diag/sf_tracepoint.h" +#include "devlink.h" struct mlx5_sf_hw { u32 usr_sfnum; @@ -243,31 +244,61 @@ static void mlx5_sf_hw_table_hwc_cleanup(struct mlx5_sf_hwc_table *hwc) kfree(hwc->sfs); } +static void mlx5_sf_hw_table_res_unregister(struct mlx5_core_dev *dev) +{ + devl_resources_unregister(priv_to_devlink(dev)); +} + +static int mlx5_sf_hw_table_res_register(struct mlx5_core_dev *dev, u16 max_fn, + u16 max_ext_fn) +{ + struct devlink_resource_size_params size_params; + struct devlink *devlink = priv_to_devlink(dev); + int err; + + devlink_resource_size_params_init(&size_params, max_fn, max_fn, 1, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devl_resource_register(devlink, "max_local_SFs", max_fn, MLX5_DL_RES_MAX_LOCAL_SFS, + DEVLINK_RESOURCE_ID_PARENT_TOP, &size_params); + if (err) + return err; + + devlink_resource_size_params_init(&size_params, max_ext_fn, max_ext_fn, 1, + DEVLINK_RESOURCE_UNIT_ENTRY); + return devl_resource_register(devlink, "max_external_SFs", max_ext_fn, + MLX5_DL_RES_MAX_EXTERNAL_SFS, DEVLINK_RESOURCE_ID_PARENT_TOP, + &size_params); +} + int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev) { struct mlx5_sf_hw_table *table; u16 max_ext_fn = 0; u16 ext_base_id = 0; - u16 max_fn = 0; u16 base_id; + u16 max_fn; int err; if (!mlx5_vhca_event_supported(dev)) return 0; - if (mlx5_sf_supported(dev)) - max_fn = mlx5_sf_max_functions(dev); + max_fn = mlx5_sf_max_functions(dev); err = mlx5_esw_sf_max_hpf_functions(dev, &max_ext_fn, &ext_base_id); if (err) return err; + if (mlx5_sf_hw_table_res_register(dev, max_fn, max_ext_fn)) + mlx5_core_dbg(dev, "failed to register max SFs resources"); + if (!max_fn && !max_ext_fn) return 0; table = kzalloc(sizeof(*table), GFP_KERNEL); - if (!table) - return -ENOMEM; + if (!table) { + err = -ENOMEM; + goto alloc_err; + } mutex_init(&table->table_lock); table->dev = dev; @@ -291,6 +322,8 @@ ext_err: table_err: mutex_destroy(&table->table_lock); kfree(table); +alloc_err: + mlx5_sf_hw_table_res_unregister(dev); return err; } @@ -299,12 +332,14 @@ void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev) struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; if (!table) - return; + goto res_unregister; - mutex_destroy(&table->table_lock); mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_EXTERNAL]); mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]); + mutex_destroy(&table->table_lock); kfree(table); +res_unregister: + mlx5_sf_hw_table_res_unregister(dev); } static int mlx5_sf_hw_vhca_event(struct notifier_block *nb, unsigned long opcode, void *data) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 4e42a3b9b8ee..a2fc937d5461 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -285,8 +285,7 @@ static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev) host_total_vfs = MLX5_GET(query_esw_functions_out, out, host_params_context.host_total_vfs); kvfree(out); - if (host_total_vfs) - return host_total_vfs; + return host_total_vfs; } done: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index e739ec6cdf90..5b83da08692d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -1422,7 +1422,6 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn, case DR_ACTION_TYP_TNL_L3_TO_L2: { u8 *hw_actions; - int ret; hw_actions = kzalloc(DR_ACTION_CACHE_LINE_SIZE, GFP_KERNEL); if (!hw_actions) @@ -2079,7 +2078,7 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn, peer_vport = vhca_id_valid && mlx5_core_is_pf(dmn->mdev) && (vhca_id != dmn->info.caps.gvmi); - vport_dmn = peer_vport ? dmn->peer_dmn[vhca_id] : dmn; + vport_dmn = peer_vport ? xa_load(&dmn->peer_dmn_xa, vhca_id) : dmn; if (!vport_dmn) { mlx5dr_dbg(dmn, "No peer vport domain for given vhca_id\n"); return NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index 7491911ebcb5..8c2a34a0d6be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -564,11 +564,12 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev, err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); if (err) - return err; + goto err_free_in; *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id); - kvfree(in); +err_free_in: + kvfree(in); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 75dc85dc24ef..3d74109f8230 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -475,6 +475,7 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) mutex_init(&dmn->info.rx.mutex); mutex_init(&dmn->info.tx.mutex); xa_init(&dmn->definers_xa); + xa_init(&dmn->peer_dmn_xa); if (dr_domain_caps_init(mdev, dmn)) { mlx5dr_err(dmn, "Failed init domain, no caps\n"); @@ -507,6 +508,7 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) uninit_caps: dr_domain_caps_uninit(dmn); def_xa_destroy: + xa_destroy(&dmn->peer_dmn_xa); xa_destroy(&dmn->definers_xa); kfree(dmn); return NULL; @@ -547,6 +549,7 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) dr_domain_uninit_csum_recalc_fts(dmn); dr_domain_uninit_resources(dmn); dr_domain_caps_uninit(dmn); + xa_destroy(&dmn->peer_dmn_xa); xa_destroy(&dmn->definers_xa); mutex_destroy(&dmn->info.tx.mutex); mutex_destroy(&dmn->info.rx.mutex); @@ -556,17 +559,21 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, struct mlx5dr_domain *peer_dmn, - u8 peer_idx) + u16 peer_vhca_id) { + struct mlx5dr_domain *peer; + mlx5dr_domain_lock(dmn); - if (dmn->peer_dmn[peer_idx]) - refcount_dec(&dmn->peer_dmn[peer_idx]->refcount); + peer = xa_load(&dmn->peer_dmn_xa, peer_vhca_id); + if (peer) + refcount_dec(&peer->refcount); - dmn->peer_dmn[peer_idx] = peer_dmn; + WARN_ON(xa_err(xa_store(&dmn->peer_dmn_xa, peer_vhca_id, peer_dmn, GFP_KERNEL))); - if (dmn->peer_dmn[peer_idx]) - refcount_inc(&dmn->peer_dmn[peer_idx]->refcount); + peer = xa_load(&dmn->peer_dmn_xa, peer_vhca_id); + if (peer) + refcount_inc(&peer->refcount); mlx5dr_domain_unlock(dmn); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c index d6947fe13d56..8ca534ef5d03 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c @@ -82,7 +82,7 @@ dr_ptrn_alloc_pattern(struct mlx5dr_ptrn_mgr *mgr, u32 chunk_size; u32 index; - chunk_size = ilog2(num_of_actions); + chunk_size = ilog2(roundup_pow_of_two(num_of_actions)); /* HW modify action index granularity is at least 64B */ chunk_size = max_t(u32, chunk_size, DR_CHUNK_SIZE_8); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 4a5ae86e2b62..4e8527a724f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -52,6 +52,7 @@ struct dr_qp_init_attr { u32 cqn; u32 pdn; u32 max_send_wr; + u32 max_send_sge; struct mlx5_uars_page *uar; u8 isolate_vl_tc:1; }; @@ -246,6 +247,37 @@ static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne) return err == CQ_POLL_ERR ? err : npolled; } +static int dr_qp_get_args_update_send_wqe_size(struct dr_qp_init_attr *attr) +{ + return roundup_pow_of_two(sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_flow_update_ctrl_seg) + + sizeof(struct mlx5_wqe_header_modify_argument_update_seg)); +} + +/* We calculate for specific RC QP with the required functionality */ +static int dr_qp_calc_rc_send_wqe(struct dr_qp_init_attr *attr) +{ + int update_arg_size; + int inl_size = 0; + int tot_size; + int size; + + update_arg_size = dr_qp_get_args_update_send_wqe_size(attr); + + size = sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_raddr_seg); + inl_size = size + ALIGN(sizeof(struct mlx5_wqe_inline_seg) + + DR_STE_SIZE, 16); + + size += attr->max_send_sge * sizeof(struct mlx5_wqe_data_seg); + + size = max(size, update_arg_size); + + tot_size = max(size, inl_size); + + return ALIGN(tot_size, MLX5_SEND_WQE_BB); +} + static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, struct dr_qp_init_attr *attr) { @@ -253,6 +285,7 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; struct mlx5_wq_param wqp; struct mlx5dr_qp *dr_qp; + int wqe_size; int inlen; void *qpc; void *in; @@ -332,6 +365,15 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, if (err) goto err_in; dr_qp->uar = attr->uar; + wqe_size = dr_qp_calc_rc_send_wqe(attr); + dr_qp->max_inline_data = min(wqe_size - + (sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_raddr_seg) + + sizeof(struct mlx5_wqe_inline_seg)), + (2 * MLX5_SEND_WQE_BB - + (sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_raddr_seg) + + sizeof(struct mlx5_wqe_inline_seg)))); return dr_qp; @@ -395,8 +437,48 @@ dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, MLX5_SEND_WQE_DS; } +static int dr_set_data_inl_seg(struct mlx5dr_qp *dr_qp, + struct dr_data_seg *data_seg, void *wqe) +{ + int inline_header_size = sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_raddr_seg) + + sizeof(struct mlx5_wqe_inline_seg); + struct mlx5_wqe_inline_seg *seg; + int left_space; + int inl = 0; + void *addr; + int len; + int idx; + + seg = wqe; + wqe += sizeof(*seg); + addr = (void *)(unsigned long)(data_seg->addr); + len = data_seg->length; + inl += len; + left_space = MLX5_SEND_WQE_BB - inline_header_size; + + if (likely(len > left_space)) { + memcpy(wqe, addr, left_space); + len -= left_space; + addr += left_space; + idx = (dr_qp->sq.pc + 1) & (dr_qp->sq.wqe_cnt - 1); + wqe = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); + } + + memcpy(wqe, addr, len); + + if (likely(inl)) { + seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); + return DIV_ROUND_UP(inl + sizeof(seg->byte_count), + MLX5_SEND_WQE_DS); + } else { + return 0; + } +} + static void -dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, +dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp, + struct mlx5_wqe_ctrl_seg *wq_ctrl, u64 remote_addr, u32 rkey, struct dr_data_seg *data_seg, @@ -412,15 +494,17 @@ dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, wq_raddr->reserved = 0; wq_dseg = (void *)(wq_raddr + 1); + /* WQE ctrl segment + WQE remote addr segment */ + *size = (sizeof(*wq_ctrl) + sizeof(*wq_raddr)) / MLX5_SEND_WQE_DS; - wq_dseg->byte_count = cpu_to_be32(data_seg->length); - wq_dseg->lkey = cpu_to_be32(data_seg->lkey); - wq_dseg->addr = cpu_to_be64(data_seg->addr); - - *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */ - sizeof(*wq_dseg) + /* WQE data segment */ - sizeof(*wq_raddr)) / /* WQE remote addr segment */ - MLX5_SEND_WQE_DS; + if (data_seg->send_flags & IB_SEND_INLINE) { + *size += dr_set_data_inl_seg(dr_qp, data_seg, wq_dseg); + } else { + wq_dseg->byte_count = cpu_to_be32(data_seg->length); + wq_dseg->lkey = cpu_to_be32(data_seg->lkey); + wq_dseg->addr = cpu_to_be64(data_seg->addr); + *size += sizeof(*wq_dseg) / MLX5_SEND_WQE_DS; /* WQE data segment */ + } } static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl, @@ -451,7 +535,7 @@ static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, switch (opcode) { case MLX5_OPCODE_RDMA_READ: case MLX5_OPCODE_RDMA_WRITE: - dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr, + dr_rdma_handle_icm_write_segments(dr_qp, wq_ctrl, remote_addr, rkey, data_seg, &size); break; case MLX5_OPCODE_FLOW_TBL_ACCESS: @@ -572,7 +656,7 @@ static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring, if (send_ring->pending_wqe % send_ring->signal_th == 0) send_info->write.send_flags |= IB_SEND_SIGNALED; else - send_info->write.send_flags = 0; + send_info->write.send_flags &= ~IB_SEND_SIGNALED; } static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn, @@ -596,9 +680,13 @@ static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn, } send_ring->pending_wqe++; + if (!send_info->write.lkey) + send_info->write.send_flags |= IB_SEND_INLINE; if (send_ring->pending_wqe % send_ring->signal_th == 0) send_info->write.send_flags |= IB_SEND_SIGNALED; + else + send_info->write.send_flags &= ~IB_SEND_SIGNALED; send_ring->pending_wqe++; send_info->read.length = send_info->write.length; @@ -608,9 +696,9 @@ static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn, send_info->read.lkey = send_ring->sync_mr->mkey; if (send_ring->pending_wqe % send_ring->signal_th == 0) - send_info->read.send_flags = IB_SEND_SIGNALED; + send_info->read.send_flags |= IB_SEND_SIGNALED; else - send_info->read.send_flags = 0; + send_info->read.send_flags &= ~IB_SEND_SIGNALED; } static void dr_fill_data_segs(struct mlx5dr_domain *dmn, @@ -1096,8 +1184,8 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, if (!in) goto err_cqwq; - vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); - err = mlx5_vector2eqn(mdev, vector, &eqn); + vector = raw_smp_processor_id() % mlx5_comp_vectors_max(mdev); + err = mlx5_comp_eqn_get(mdev, vector, &eqn); if (err) { kvfree(in); goto err_cqwq; @@ -1257,6 +1345,7 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) dmn->send_ring->cq->qp = dmn->send_ring->qp; dmn->info.max_send_wr = QUEUE_SIZE; + init_attr.max_send_sge = 1; dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data, DR_STE_SIZE); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c index 69d7a8f3c402..f708b029425a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c @@ -1652,17 +1652,18 @@ dr_ste_v0_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, struct mlx5dr_domain *dmn = sb->dmn; struct mlx5dr_domain *vport_dmn; u8 *bit_mask = sb->bit_mask; + struct mlx5dr_domain *peer; bool source_gvmi_set; DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn); if (sb->vhca_id_valid) { + peer = xa_load(&dmn->peer_dmn_xa, id); /* Find port GVMI based on the eswitch_owner_vhca_id */ if (id == dmn->info.caps.gvmi) vport_dmn = dmn; - else if (id < MLX5_MAX_PORTS && dmn->peer_dmn[id] && - (id == dmn->peer_dmn[id]->info.caps.gvmi)) - vport_dmn = dmn->peer_dmn[id]; + else if (peer && (id == peer->info.caps.gvmi)) + vport_dmn = peer; else return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index f4ef0b22b991..dd856cde188d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -1984,16 +1984,17 @@ static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, struct mlx5dr_domain *dmn = sb->dmn; struct mlx5dr_domain *vport_dmn; u8 *bit_mask = sb->bit_mask; + struct mlx5dr_domain *peer; DR_STE_SET_TAG(src_gvmi_qp_v1, tag, source_qp, misc, source_sqn); if (sb->vhca_id_valid) { + peer = xa_load(&dmn->peer_dmn_xa, id); /* Find port GVMI based on the eswitch_owner_vhca_id */ if (id == dmn->info.caps.gvmi) vport_dmn = dmn; - else if (id < MLX5_MAX_PORTS && dmn->peer_dmn[id] && - (id == dmn->peer_dmn[id]->info.caps.gvmi)) - vport_dmn = dmn->peer_dmn[id]; + else if (peer && (id == peer->info.caps.gvmi)) + vport_dmn = peer; else return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 1622dbbe6b97..6c59de3e28f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -935,7 +935,6 @@ struct mlx5dr_domain_info { }; struct mlx5dr_domain { - struct mlx5dr_domain *peer_dmn[MLX5_MAX_PORTS]; struct mlx5_core_dev *mdev; u32 pdn; struct mlx5_uars_page *uar; @@ -956,6 +955,7 @@ struct mlx5dr_domain { struct list_head dbg_tbl_list; struct mlx5dr_dbg_dump_info dump_info; struct xarray definers_xa; + struct xarray peer_dmn_xa; /* memory management statistics */ u32 num_buddies[DR_ICM_TYPE_MAX]; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 6aac5f006bf8..14f6df88b1f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -336,7 +336,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, if (fte->action.pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_FW) { err = -EINVAL; mlx5dr_err(domain, "FW-owned reformat can't be used in SW rule\n"); - goto free_actions; + goto free_actions; } is_decap = fte->action.pkt_reformat->reformat_type == @@ -781,14 +781,14 @@ restore_fte: static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_root_namespace *peer_ns, - u8 peer_idx) + u16 peer_vhca_id) { struct mlx5dr_domain *peer_domain = NULL; if (peer_ns) peer_domain = peer_ns->fs_dr_domain.dr_domain; mlx5dr_domain_set_peer(ns->fs_dr_domain.dr_domain, - peer_domain, peer_idx); + peer_domain, peer_vhca_id); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index 24cbb33ecd6c..89fced86936f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -49,7 +49,7 @@ int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags); void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, struct mlx5dr_domain *peer_dmn, - u8 peer_idx); + u16 peer_vhca_id); struct mlx5dr_table * mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c deleted file mode 100644 index 52199d39657e..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c +++ /dev/null @@ -1,114 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. - -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/device.h> -#include <linux/thermal.h> -#include <linux/err.h> -#include <linux/mlx5/driver.h> -#include "mlx5_core.h" -#include "thermal.h" - -#define MLX5_THERMAL_POLL_INT_MSEC 1000 -#define MLX5_THERMAL_NUM_TRIPS 0 -#define MLX5_THERMAL_ASIC_SENSOR_INDEX 0 - -/* Bit string indicating the writeablility of trip points if any */ -#define MLX5_THERMAL_TRIP_MASK (BIT(MLX5_THERMAL_NUM_TRIPS) - 1) - -struct mlx5_thermal { - struct mlx5_core_dev *mdev; - struct thermal_zone_device *tzdev; -}; - -static int mlx5_thermal_get_mtmp_temp(struct mlx5_core_dev *mdev, u32 id, int *p_temp) -{ - u32 mtmp_out[MLX5_ST_SZ_DW(mtmp_reg)] = {}; - u32 mtmp_in[MLX5_ST_SZ_DW(mtmp_reg)] = {}; - int err; - - MLX5_SET(mtmp_reg, mtmp_in, sensor_index, id); - - err = mlx5_core_access_reg(mdev, mtmp_in, sizeof(mtmp_in), - mtmp_out, sizeof(mtmp_out), - MLX5_REG_MTMP, 0, 0); - - if (err) - return err; - - *p_temp = MLX5_GET(mtmp_reg, mtmp_out, temperature); - - return 0; -} - -static int mlx5_thermal_get_temp(struct thermal_zone_device *tzdev, - int *p_temp) -{ - struct mlx5_thermal *thermal = thermal_zone_device_priv(tzdev); - struct mlx5_core_dev *mdev = thermal->mdev; - int err; - - err = mlx5_thermal_get_mtmp_temp(mdev, MLX5_THERMAL_ASIC_SENSOR_INDEX, p_temp); - - if (err) - return err; - - /* The unit of temp returned is in 0.125 C. The thermal - * framework expects the value in 0.001 C. - */ - *p_temp *= 125; - - return 0; -} - -static struct thermal_zone_device_ops mlx5_thermal_ops = { - .get_temp = mlx5_thermal_get_temp, -}; - -int mlx5_thermal_init(struct mlx5_core_dev *mdev) -{ - char data[THERMAL_NAME_LENGTH]; - struct mlx5_thermal *thermal; - int err; - - if (!mlx5_core_is_pf(mdev) && !mlx5_core_is_ecpf(mdev)) - return 0; - - err = snprintf(data, sizeof(data), "mlx5_%s", dev_name(mdev->device)); - if (err < 0 || err >= sizeof(data)) { - mlx5_core_err(mdev, "Failed to setup thermal zone name, %d\n", err); - return -EINVAL; - } - - thermal = kzalloc(sizeof(*thermal), GFP_KERNEL); - if (!thermal) - return -ENOMEM; - - thermal->mdev = mdev; - thermal->tzdev = thermal_zone_device_register_with_trips(data, - NULL, - MLX5_THERMAL_NUM_TRIPS, - MLX5_THERMAL_TRIP_MASK, - thermal, - &mlx5_thermal_ops, - NULL, 0, MLX5_THERMAL_POLL_INT_MSEC); - if (IS_ERR(thermal->tzdev)) { - err = PTR_ERR(thermal->tzdev); - mlx5_core_err(mdev, "Failed to register thermal zone device (%s) %d\n", data, err); - kfree(thermal); - return err; - } - - mdev->thermal = thermal; - return 0; -} - -void mlx5_thermal_uninit(struct mlx5_core_dev *mdev) -{ - if (!mdev->thermal) - return; - - thermal_zone_device_unregister(mdev->thermal->tzdev); - kfree(mdev->thermal); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.h b/drivers/net/ethernet/mellanox/mlx5/core/thermal.h deleted file mode 100644 index 7d752c122192..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/thermal.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB - * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. - */ -#ifndef __MLX5_THERMAL_DRIVER_H -#define __MLX5_THERMAL_DRIVER_H - -#if IS_ENABLED(CONFIG_THERMAL) -int mlx5_thermal_init(struct mlx5_core_dev *mdev); -void mlx5_thermal_uninit(struct mlx5_core_dev *mdev); -#else -static inline int mlx5_thermal_init(struct mlx5_core_dev *mdev) -{ - mdev->thermal = NULL; - return 0; -} - -static inline void mlx5_thermal_uninit(struct mlx5_core_dev *mdev) { } -#endif - -#endif /* __MLX5_THERMAL_DRIVER_H */ diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h index a453b9cd9033..bc94e75a7aeb 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h @@ -175,9 +175,6 @@ enum mlxbf_gige_res { int mlxbf_gige_mdio_probe(struct platform_device *pdev, struct mlxbf_gige *priv); void mlxbf_gige_mdio_remove(struct mlxbf_gige *priv); -irqreturn_t mlxbf_gige_mdio_handle_phy_interrupt(int irq, void *dev_id); -void mlxbf_gige_mdio_enable_phy_int(struct mlxbf_gige *priv); - void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, unsigned int index, u64 dmac); void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv, diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 3ca9fce759ea..71cad6bb6e62 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -29,7 +29,7 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_nve.o spectrum_nve_vxlan.o \ spectrum_dpipe.o spectrum_trap.o \ spectrum_ethtool.o spectrum_policer.o \ - spectrum_pgt.o + spectrum_pgt.o spectrum_port_range.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_PTP_1588_CLOCK) += spectrum_ptp.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 9dfe7148199f..faa63ea9b83e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -1887,6 +1887,46 @@ int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid, } EXPORT_SYMBOL(mlxsw_afa_block_append_fid_set); +/* Ignore Action + * ------------- + * The ignore action is used to ignore basic switching functions such as + * learning on a per-packet basis. + */ + +#define MLXSW_AFA_IGNORE_CODE 0x0F +#define MLXSW_AFA_IGNORE_SIZE 1 + +/* afa_ignore_disable_learning + * Disable learning on ingress. + */ +MLXSW_ITEM32(afa, ignore, disable_learning, 0x00, 29, 1); + +/* afa_ignore_disable_security + * Disable security lookup on ingress. + * Reserved when Spectrum-1. + */ +MLXSW_ITEM32(afa, ignore, disable_security, 0x00, 28, 1); + +static void mlxsw_afa_ignore_pack(char *payload, bool disable_learning, + bool disable_security) +{ + mlxsw_afa_ignore_disable_learning_set(payload, disable_learning); + mlxsw_afa_ignore_disable_security_set(payload, disable_security); +} + +int mlxsw_afa_block_append_ignore(struct mlxsw_afa_block *block, + bool disable_learning, bool disable_security) +{ + char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_IGNORE_CODE, + MLXSW_AFA_IGNORE_SIZE); + + if (IS_ERR(act)) + return PTR_ERR(act); + mlxsw_afa_ignore_pack(act, disable_learning, disable_security); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_ignore); + /* MC Routing Action * ----------------- * The Multicast router action. Can be used by RMFT_V2 - Router Multicast diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index db58037be46e..0ead3a212de8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -89,6 +89,8 @@ int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, struct netlink_ext_ack *extack); int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid, struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_ignore(struct mlxsw_afa_block *block, + bool disable_learning, bool disable_security); int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block, u16 expected_irif, u16 min_mtu, bool rmid_valid, u32 kvdl_index); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c index f0b2963ebac3..70f9b5e85a26 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c @@ -32,8 +32,8 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8), MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2), MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6), - MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_MSB, 0x18, 17, 3), - MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_LSB, 0x18, 20, 8), + MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_MSB, 0x18, 17, 4), + MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_LSB, 0x18, 21, 8), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4), @@ -43,6 +43,7 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_32_63, 0x38, 4), MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_0_31, 0x3C, 4), MLXSW_AFK_ELEMENT_INFO_U32(FDB_MISS, 0x40, 0, 1), + MLXSW_AFK_ELEMENT_INFO_U32(L4_PORT_RANGE, 0x40, 1, 16), }; struct mlxsw_afk { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h index 65a4abadc7db..2eac7582c31a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -36,6 +36,7 @@ enum mlxsw_afk_element { MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB, MLXSW_AFK_ELEMENT_VIRT_ROUTER_LSB, MLXSW_AFK_ELEMENT_FDB_MISS, + MLXSW_AFK_ELEMENT_L4_PORT_RANGE, MLXSW_AFK_ELEMENT_MAX, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index 0107cbc32fc7..d637c0348fa1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -32,6 +32,7 @@ struct mlxsw_env { const struct mlxsw_bus_info *bus_info; u8 max_module_count; /* Maximum number of modules per-slot. */ u8 num_of_slots; /* Including the main board. */ + u8 max_eeprom_len; /* Maximum module EEPROM transaction length. */ struct mutex line_cards_lock; /* Protects line cards. */ struct mlxsw_env_line_card *line_cards[]; }; @@ -111,7 +112,7 @@ mlxsw_env_validate_cable_ident(struct mlxsw_core *core, u8 slot_index, int id, if (err) return err; - mlxsw_reg_mcia_pack(mcia_pl, slot_index, id, 0, + mlxsw_reg_mcia_pack(mcia_pl, slot_index, id, MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, 1, MLXSW_REG_MCIA_I2C_ADDR_LOW); err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl); @@ -146,6 +147,7 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, u8 slot_index, int module, u16 offset, u16 size, void *data, bool qsfp, unsigned int *p_read_size) { + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); char mcia_pl[MLXSW_REG_MCIA_LEN]; char *eeprom_tmp; u16 i2c_addr; @@ -153,11 +155,7 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, u8 slot_index, int status; int err; - /* MCIA register accepts buffer size <= 48. Page of size 128 should be - * read by chunks of size 48, 48, 32. Align the size of the last chunk - * to avoid reading after the end of the page. - */ - size = min_t(u16, size, MLXSW_REG_MCIA_EEPROM_SIZE); + size = min_t(u16, size, mlxsw_env->max_eeprom_len); if (offset < MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH && offset + size > MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) @@ -188,7 +186,7 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, u8 slot_index, } } - mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, 0, page, offset, size, + mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, page, offset, size, i2c_addr); err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcia), mcia_pl); @@ -266,12 +264,12 @@ mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, u8 slot_index, page = MLXSW_REG_MCIA_TH_PAGE_CMIS_NUM; else page = MLXSW_REG_MCIA_TH_PAGE_NUM; - mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, 0, page, + mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, page, MLXSW_REG_MCIA_TH_PAGE_OFF + off, MLXSW_REG_MCIA_TH_ITEM_SIZE, MLXSW_REG_MCIA_I2C_ADDR_LOW); } else { - mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, 0, + mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, MLXSW_REG_MCIA_PAGE0_LO, off, MLXSW_REG_MCIA_TH_ITEM_SIZE, MLXSW_REG_MCIA_I2C_ADDR_HIGH); @@ -489,9 +487,9 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 size; size = min_t(u8, page->length - bytes_read, - MLXSW_REG_MCIA_EEPROM_SIZE); + mlxsw_env->max_eeprom_len); - mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, 0, page->page, + mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, page->page, device_addr + bytes_read, size, page->i2c_address); mlxsw_reg_mcia_bank_number_set(mcia_pl, page->bank); @@ -1359,6 +1357,26 @@ static struct mlxsw_linecards_event_ops mlxsw_env_event_ops = { .got_inactive = mlxsw_env_got_inactive, }; +static int mlxsw_env_max_module_eeprom_len_query(struct mlxsw_env *mlxsw_env) +{ + char mcam_pl[MLXSW_REG_MCAM_LEN]; + bool mcia_128b_supported; + int err; + + mlxsw_reg_mcam_pack(mcam_pl, + MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES); + err = mlxsw_reg_query(mlxsw_env->core, MLXSW_REG(mcam), mcam_pl); + if (err) + return err; + + mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_MCIA_128B, + &mcia_128b_supported); + + mlxsw_env->max_eeprom_len = mcia_128b_supported ? 128 : 48; + + return 0; +} + int mlxsw_env_init(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *bus_info, struct mlxsw_env **p_env) @@ -1427,10 +1445,15 @@ int mlxsw_env_init(struct mlxsw_core *mlxsw_core, if (err) goto err_type_set; + err = mlxsw_env_max_module_eeprom_len_query(env); + if (err) + goto err_eeprom_len_query; + env->line_cards[0]->active = true; return 0; +err_eeprom_len_query: err_type_set: mlxsw_env_module_event_disable(env, 0); err_mlxsw_env_module_event_enable: diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 70735068cf29..0fd290d776ff 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -405,7 +405,8 @@ mlxsw_hwmon_module_temp_label_show(struct device *dev, container_of(attr, struct mlxsw_hwmon_attr, dev_attr); return sprintf(buf, "front panel %03u\n", - mlxsw_hwmon_attr->type_index); + mlxsw_hwmon_attr->type_index + 1 - + mlxsw_hwmon_attr->mlxsw_hwmon_dev->sensor_count); } static ssize_t diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 41298835a11e..d23f293e285c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -48,6 +48,7 @@ #define MLXSW_I2C_MBOX_SIZE_BITS 12 #define MLXSW_I2C_ADDR_BUF_SIZE 4 #define MLXSW_I2C_BLK_DEF 32 +#define MLXSW_I2C_BLK_MAX 100 #define MLXSW_I2C_RETRY 5 #define MLXSW_I2C_TIMEOUT_MSECS 5000 #define MLXSW_I2C_MAX_DATA_SIZE 256 @@ -444,7 +445,7 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size, } else { /* No input mailbox is case of initialization query command. */ reg_size = MLXSW_I2C_MAX_DATA_SIZE; - num = reg_size / mlxsw_i2c->block_size; + num = DIV_ROUND_UP(reg_size, mlxsw_i2c->block_size); if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) { dev_err(&client->dev, "Could not acquire lock"); @@ -653,7 +654,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client) return -EOPNOTSUPP; } - mlxsw_i2c->block_size = max_t(u16, MLXSW_I2C_BLK_DEF, + mlxsw_i2c->block_size = min_t(u16, MLXSW_I2C_BLK_MAX, min_t(u16, quirks->max_read_len, quirks->max_write_len)); } else { diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index c968309657dd..51eea1f0529c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -517,11 +517,15 @@ static void mlxsw_pci_skb_cb_ts_set(struct mlxsw_pci *mlxsw_pci, struct sk_buff *skb, enum mlxsw_pci_cqe_v cqe_v, char *cqe) { + u8 ts_type; + if (cqe_v != MLXSW_PCI_CQE_V2) return; - if (mlxsw_pci_cqe2_time_stamp_type_get(cqe) != - MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC) + ts_type = mlxsw_pci_cqe2_time_stamp_type_get(cqe); + + if (ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC && + ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_MIRROR_UTC) return; mlxsw_skb_cb(skb)->cqe_ts.sec = mlxsw_pci_cqe2_time_stamp_sec_get(cqe); diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 8165bf31a99a..ae556ddd7624 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -97,14 +97,6 @@ MLXSW_ITEM32(reg, sspr, m, 0x00, 31, 1); */ MLXSW_ITEM32_LP(reg, sspr, 0x00, 16, 0x00, 12); -/* reg_sspr_sub_port - * Virtual port within the physical port. - * Should be set to 0 when virtual ports are not enabled on the port. - * - * Access: RW - */ -MLXSW_ITEM32(reg, sspr, sub_port, 0x00, 8, 8); - /* reg_sspr_system_port * Unique identifier within the stacking domain that represents all the ports * that are available in the system (external ports). @@ -120,7 +112,6 @@ static inline void mlxsw_reg_sspr_pack(char *payload, u16 local_port) MLXSW_REG_ZERO(sspr, payload); mlxsw_reg_sspr_m_set(payload, 1); mlxsw_reg_sspr_local_port_set(payload, local_port); - mlxsw_reg_sspr_sub_port_set(payload, 0); mlxsw_reg_sspr_system_port_set(payload, local_port); } @@ -2799,6 +2790,78 @@ static inline void mlxsw_reg_ptar_unpack(char *payload, char *tcam_region_info) mlxsw_reg_ptar_tcam_region_info_memcpy_from(payload, tcam_region_info); } +/* PPRR - Policy-Engine Port Range Register + * ---------------------------------------- + * This register is used for configuring port range identification. + */ +#define MLXSW_REG_PPRR_ID 0x3008 +#define MLXSW_REG_PPRR_LEN 0x14 + +MLXSW_REG_DEFINE(pprr, MLXSW_REG_PPRR_ID, MLXSW_REG_PPRR_LEN); + +/* reg_pprr_ipv4 + * Apply port range register to IPv4 packets. + * Access: RW + */ +MLXSW_ITEM32(reg, pprr, ipv4, 0x00, 31, 1); + +/* reg_pprr_ipv6 + * Apply port range register to IPv6 packets. + * Access: RW + */ +MLXSW_ITEM32(reg, pprr, ipv6, 0x00, 30, 1); + +/* reg_pprr_src + * Apply port range register to source L4 ports. + * Access: RW + */ +MLXSW_ITEM32(reg, pprr, src, 0x00, 29, 1); + +/* reg_pprr_dst + * Apply port range register to destination L4 ports. + * Access: RW + */ +MLXSW_ITEM32(reg, pprr, dst, 0x00, 28, 1); + +/* reg_pprr_tcp + * Apply port range register to TCP packets. + * Access: RW + */ +MLXSW_ITEM32(reg, pprr, tcp, 0x00, 27, 1); + +/* reg_pprr_udp + * Apply port range register to UDP packets. + * Access: RW + */ +MLXSW_ITEM32(reg, pprr, udp, 0x00, 26, 1); + +/* reg_pprr_register_index + * Index of Port Range Register being accessed. + * Range is 0..cap_max_acl_l4_port_range-1. + * Access: Index + */ +MLXSW_ITEM32(reg, pprr, register_index, 0x00, 0, 8); + +/* reg_prrr_port_range_min + * Minimum port range for comparison. + * Match is defined as: + * port_range_min <= packet_port <= port_range_max. + * Access: RW + */ +MLXSW_ITEM32(reg, pprr, port_range_min, 0x04, 16, 16); + +/* reg_prrr_port_range_max + * Maximum port range for comparison. + * Access: RW + */ +MLXSW_ITEM32(reg, pprr, port_range_max, 0x04, 0, 16); + +static inline void mlxsw_reg_pprr_pack(char *payload, u8 register_index) +{ + MLXSW_REG_ZERO(pprr, payload); + mlxsw_reg_pprr_register_index_set(payload, register_index); +} + /* PPBS - Policy-Engine Policy Based Switching Register * ---------------------------------------------------- * This register retrieves and sets Policy Based Switching Table entries. @@ -9568,18 +9631,10 @@ static inline void mlxsw_reg_mtbr_temp_unpack(char *payload, int rec_ind, */ #define MLXSW_REG_MCIA_ID 0x9014 -#define MLXSW_REG_MCIA_LEN 0x40 +#define MLXSW_REG_MCIA_LEN 0x94 MLXSW_REG_DEFINE(mcia, MLXSW_REG_MCIA_ID, MLXSW_REG_MCIA_LEN); -/* reg_mcia_l - * Lock bit. Setting this bit will lock the access to the specific - * cable. Used for updating a full page in a cable EPROM. Any access - * other then subsequence writes will fail while the port is locked. - * Access: RW - */ -MLXSW_ITEM32(reg, mcia, l, 0x00, 31, 1); - /* reg_mcia_module * Module number. * Access: Index @@ -9644,7 +9699,6 @@ MLXSW_ITEM32(reg, mcia, size, 0x08, 0, 16); #define MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH 256 #define MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH 128 -#define MLXSW_REG_MCIA_EEPROM_SIZE 48 #define MLXSW_REG_MCIA_I2C_ADDR_LOW 0x50 #define MLXSW_REG_MCIA_I2C_ADDR_HIGH 0x51 #define MLXSW_REG_MCIA_PAGE0_LO_OFF 0xa0 @@ -9681,7 +9735,7 @@ enum mlxsw_reg_mcia_eeprom_module_info { * Bytes to read/write. * Access: RW */ -MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_REG_MCIA_EEPROM_SIZE); +MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, 128); /* This is used to access the optional upper pages (1-3) in the QSFP+ * memory map. Page 1 is available on offset 256 through 383, page 2 - @@ -9692,14 +9746,12 @@ MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_REG_MCIA_EEPROM_SIZE); MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH + 1) static inline void mlxsw_reg_mcia_pack(char *payload, u8 slot_index, u8 module, - u8 lock, u8 page_number, - u16 device_addr, u8 size, + u8 page_number, u16 device_addr, u8 size, u8 i2c_device_addr) { MLXSW_REG_ZERO(mcia, payload); mlxsw_reg_mcia_slot_set(payload, slot_index); mlxsw_reg_mcia_module_set(payload, module); - mlxsw_reg_mcia_l_set(payload, lock); mlxsw_reg_mcia_page_number_set(payload, page_number); mlxsw_reg_mcia_device_address_set(payload, device_addr); mlxsw_reg_mcia_size_set(payload, size); @@ -10509,6 +10561,79 @@ static inline void mlxsw_reg_mcda_pack(char *payload, u32 update_handle, mlxsw_reg_mcda_data_set(payload, i, *(u32 *) &data[i * 4]); } +/* MCAM - Management Capabilities Mask Register + * -------------------------------------------- + * Reports the device supported management features. + */ +#define MLXSW_REG_MCAM_ID 0x907F +#define MLXSW_REG_MCAM_LEN 0x48 + +MLXSW_REG_DEFINE(mcam, MLXSW_REG_MCAM_ID, MLXSW_REG_MCAM_LEN); + +enum mlxsw_reg_mcam_feature_group { + /* Enhanced features. */ + MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES, +}; + +/* reg_mcam_feature_group + * Feature list mask index. + * Access: Index + */ +MLXSW_ITEM32(reg, mcam, feature_group, 0x00, 16, 8); + +enum mlxsw_reg_mcam_mng_feature_cap_mask_bits { + /* If set, MCIA supports 128 bytes payloads. Otherwise, 48 bytes. */ + MLXSW_REG_MCAM_MCIA_128B = 34, +}; + +#define MLXSW_REG_BYTES_PER_DWORD 0x4 + +/* reg_mcam_mng_feature_cap_mask + * Supported port's enhanced features. + * Based on feature_group index. + * When bit is set, the feature is supported in the device. + * Access: RO + */ +#define MLXSW_REG_MCAM_MNG_FEATURE_CAP_MASK_DWORD(_dw_num, _offset) \ + MLXSW_ITEM_BIT_ARRAY(reg, mcam, mng_feature_cap_mask_dw##_dw_num, \ + _offset, MLXSW_REG_BYTES_PER_DWORD, 1) + +/* The access to the bits in the field 'mng_feature_cap_mask' is not same to + * other mask fields in other registers. In most of the cases bit #0 is the + * first one in the last dword. In MCAM register, the first dword contains bits + * #0-#31 and so on, so the access to the bits is simpler using bit array per + * dword. Declare each dword of 'mng_feature_cap_mask' field separately. + */ +MLXSW_REG_MCAM_MNG_FEATURE_CAP_MASK_DWORD(0, 0x28); +MLXSW_REG_MCAM_MNG_FEATURE_CAP_MASK_DWORD(1, 0x2C); +MLXSW_REG_MCAM_MNG_FEATURE_CAP_MASK_DWORD(2, 0x30); +MLXSW_REG_MCAM_MNG_FEATURE_CAP_MASK_DWORD(3, 0x34); + +static inline void +mlxsw_reg_mcam_pack(char *payload, enum mlxsw_reg_mcam_feature_group feat_group) +{ + MLXSW_REG_ZERO(mcam, payload); + mlxsw_reg_mcam_feature_group_set(payload, feat_group); +} + +static inline void +mlxsw_reg_mcam_unpack(char *payload, + enum mlxsw_reg_mcam_mng_feature_cap_mask_bits bit, + bool *p_mng_feature_cap_val) +{ + int offset = bit % (MLXSW_REG_BYTES_PER_DWORD * BITS_PER_BYTE); + int dword = bit / (MLXSW_REG_BYTES_PER_DWORD * BITS_PER_BYTE); + u8 (*getters[])(const char *, u16) = { + mlxsw_reg_mcam_mng_feature_cap_mask_dw0_get, + mlxsw_reg_mcam_mng_feature_cap_mask_dw1_get, + mlxsw_reg_mcam_mng_feature_cap_mask_dw2_get, + mlxsw_reg_mcam_mng_feature_cap_mask_dw3_get, + }; + + if (!WARN_ON_ONCE(dword >= ARRAY_SIZE(getters))) + *p_mng_feature_cap_val = getters[dword](payload, offset); +} + /* MPSC - Monitoring Packet Sampling Configuration Register * -------------------------------------------------------- * MPSC Register is used to configure the Packet Sampling mechanism. @@ -12819,6 +12944,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(pacl), MLXSW_REG(pagt), MLXSW_REG(ptar), + MLXSW_REG(pprr), MLXSW_REG(ppbs), MLXSW_REG(prcr), MLXSW_REG(pefa), @@ -12901,10 +13027,11 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mcion), MLXSW_REG(mtpps), MLXSW_REG(mtutc), - MLXSW_REG(mpsc), MLXSW_REG(mcqi), MLXSW_REG(mcc), MLXSW_REG(mcda), + MLXSW_REG(mcam), + MLXSW_REG(mpsc), MLXSW_REG(mgpc), MLXSW_REG(mprs), MLXSW_REG(mogcr), diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index 19ae0d1c74a8..89dd2777ec4d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -39,6 +39,7 @@ enum mlxsw_res_id { MLXSW_RES_ID_ACL_FLEX_KEYS, MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE, MLXSW_RES_ID_ACL_ACTIONS_PER_SET, + MLXSW_RES_ID_ACL_MAX_L4_PORT_RANGE, MLXSW_RES_ID_ACL_MAX_ERPT_BANKS, MLXSW_RES_ID_ACL_MAX_ERPT_BANK_SIZE, MLXSW_RES_ID_ACL_MAX_LARGE_KEY_ID, @@ -99,6 +100,7 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_ACL_FLEX_KEYS] = 0x2910, [MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE] = 0x2911, [MLXSW_RES_ID_ACL_ACTIONS_PER_SET] = 0x2912, + [MLXSW_RES_ID_ACL_MAX_L4_PORT_RANGE] = 0x2920, [MLXSW_RES_ID_ACL_MAX_ERPT_BANKS] = 0x2940, [MLXSW_RES_ID_ACL_MAX_ERPT_BANK_SIZE] = 0x2941, [MLXSW_RES_ID_ACL_MAX_LARGE_KEY_ID] = 0x2942, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 25a01dafde1b..9dbd5edff0b0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1132,8 +1132,8 @@ static int mlxsw_sp_port_add_vid(struct net_device *dev, return PTR_ERR_OR_ZERO(mlxsw_sp_port_vlan_create(mlxsw_sp_port, vid)); } -static int mlxsw_sp_port_kill_vid(struct net_device *dev, - __be16 __always_unused proto, u16 vid) +int mlxsw_sp_port_kill_vid(struct net_device *dev, + __be16 __always_unused proto, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; @@ -3188,6 +3188,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_nve_init; } + err = mlxsw_sp_port_range_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize port ranges\n"); + goto err_port_range_init; + } + err = mlxsw_sp_acl_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n"); @@ -3280,6 +3286,8 @@ err_ptp_clock_init: err_router_init: mlxsw_sp_acl_fini(mlxsw_sp); err_acl_init: + mlxsw_sp_port_range_fini(mlxsw_sp); +err_port_range_init: mlxsw_sp_nve_fini(mlxsw_sp); err_nve_init: mlxsw_sp_ipv6_addr_ht_fini(mlxsw_sp); @@ -3462,6 +3470,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) } mlxsw_sp_router_fini(mlxsw_sp); mlxsw_sp_acl_fini(mlxsw_sp); + mlxsw_sp_port_range_fini(mlxsw_sp); mlxsw_sp_nve_fini(mlxsw_sp); mlxsw_sp_ipv6_addr_ht_fini(mlxsw_sp); mlxsw_sp_afa_fini(mlxsw_sp); @@ -3730,6 +3739,26 @@ static int mlxsw_sp_resources_rifs_register(struct mlxsw_core *mlxsw_core) &size_params); } +static int +mlxsw_sp_resources_port_range_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_resource_size_params size_params; + u64 max; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, ACL_MAX_L4_PORT_RANGE)) + return -EIO; + + max = MLXSW_CORE_RES_GET(mlxsw_core, ACL_MAX_L4_PORT_RANGE); + devlink_resource_size_params_init(&size_params, max, max, 1, + DEVLINK_RESOURCE_UNIT_ENTRY); + + return devl_resource_register(devlink, "port_range_registers", max, + MLXSW_SP_RESOURCE_PORT_RANGE_REGISTERS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &size_params); +} + static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core) { int err; @@ -3758,8 +3787,13 @@ static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core) if (err) goto err_resources_rifs_register; + err = mlxsw_sp_resources_port_range_register(mlxsw_core); + if (err) + goto err_resources_port_range_register; + return 0; +err_resources_port_range_register: err_resources_rifs_register: err_resources_rif_mac_profile_register: err_policer_resources_register: @@ -3797,8 +3831,13 @@ static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core) if (err) goto err_resources_rifs_register; + err = mlxsw_sp_resources_port_range_register(mlxsw_core); + if (err) + goto err_resources_port_range_register; + return 0; +err_resources_port_range_register: err_resources_rifs_register: err_resources_rif_mac_profile_register: err_policer_resources_register: @@ -4073,23 +4112,6 @@ struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev) return (struct mlxsw_sp_port *)priv.data; } -struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev) -{ - struct mlxsw_sp_port *mlxsw_sp_port; - - rcu_read_lock(); - mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev); - if (mlxsw_sp_port) - dev_hold(mlxsw_sp_port->dev); - rcu_read_unlock(); - return mlxsw_sp_port; -} - -void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port) -{ - dev_put(mlxsw_sp_port->dev); -} - int mlxsw_sp_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp) { char mprs_pl[MLXSW_REG_MPRS_LEN]; @@ -4298,6 +4320,88 @@ static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, return -EBUSY; } +static int mlxsw_sp_lag_uppers_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev, + struct netlink_ext_ack *extack) +{ + struct net_device *upper_dev; + struct net_device *master; + struct list_head *iter; + int done = 0; + int err; + + master = netdev_master_upper_dev_get(lag_dev); + if (master && netif_is_bridge_master(master)) { + err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, lag_dev, master, + extack); + if (err) + return err; + } + + netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) { + if (!is_vlan_dev(upper_dev)) + continue; + + master = netdev_master_upper_dev_get(upper_dev); + if (master && netif_is_bridge_master(master)) { + err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, + upper_dev, master, + extack); + if (err) + goto err_port_bridge_join; + } + + ++done; + } + + return 0; + +err_port_bridge_join: + netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) { + if (!is_vlan_dev(upper_dev)) + continue; + + master = netdev_master_upper_dev_get(upper_dev); + if (!master || !netif_is_bridge_master(master)) + continue; + + if (!done--) + break; + + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, upper_dev, master); + } + + master = netdev_master_upper_dev_get(lag_dev); + if (master && netif_is_bridge_master(master)) + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, lag_dev, master); + + return err; +} + +static void +mlxsw_sp_lag_uppers_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + struct net_device *upper_dev; + struct net_device *master; + struct list_head *iter; + + netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) { + if (!is_vlan_dev(upper_dev)) + continue; + + master = netdev_master_upper_dev_get(upper_dev); + if (!master) + continue; + + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, upper_dev, master); + } + + master = netdev_master_upper_dev_get(lag_dev); + if (master) + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, lag_dev, master); +} + static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *lag_dev, struct netlink_ext_ack *extack) @@ -4322,6 +4426,12 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, err = mlxsw_sp_port_lag_index_get(mlxsw_sp, lag_id, &port_index); if (err) return err; + + err = mlxsw_sp_lag_uppers_bridge_join(mlxsw_sp_port, lag_dev, + extack); + if (err) + goto err_lag_uppers_bridge_join; + err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index); if (err) goto err_col_port_add; @@ -4342,8 +4452,14 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, if (err) goto err_router_join; + err = mlxsw_sp_netdevice_enslavement_replay(mlxsw_sp, lag_dev, extack); + if (err) + goto err_replay; + return 0; +err_replay: + mlxsw_sp_router_port_leave_lag(mlxsw_sp_port, lag_dev); err_router_join: lag->ref_count--; mlxsw_sp_port->lagged = 0; @@ -4351,6 +4467,8 @@ err_router_join: mlxsw_sp_port->local_port); mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); err_col_port_add: + mlxsw_sp_lag_uppers_bridge_leave(mlxsw_sp_port, lag_dev); +err_lag_uppers_bridge_join: if (!lag->ref_count) mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); return err; @@ -4600,9 +4718,62 @@ static bool mlxsw_sp_bridge_vxlan_is_valid(struct net_device *br_dev, return true; } +static bool mlxsw_sp_netdev_is_master(struct net_device *upper_dev, + struct net_device *dev) +{ + return upper_dev == netdev_master_upper_dev_get(dev); +} + +static int __mlxsw_sp_netdevice_event(struct mlxsw_sp *mlxsw_sp, + unsigned long event, void *ptr, + bool process_foreign); + +static int mlxsw_sp_netdevice_validate_uppers(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev, + struct netlink_ext_ack *extack) +{ + struct net_device *upper_dev; + struct list_head *iter; + int err; + + netdev_for_each_upper_dev_rcu(dev, upper_dev, iter) { + struct netdev_notifier_changeupper_info info = { + .info = { + .dev = dev, + .extack = extack, + }, + .master = mlxsw_sp_netdev_is_master(upper_dev, dev), + .upper_dev = upper_dev, + .linking = true, + + /* upper_info is relevant for LAG devices. But we would + * only need this if LAG were a valid upper above + * another upper (e.g. a bridge that is a member of a + * LAG), and that is never a valid configuration. So we + * can keep this as NULL. + */ + .upper_info = NULL, + }; + + err = __mlxsw_sp_netdevice_event(mlxsw_sp, + NETDEV_PRECHANGEUPPER, + &info, true); + if (err) + return err; + + err = mlxsw_sp_netdevice_validate_uppers(mlxsw_sp, upper_dev, + extack); + if (err) + return err; + } + + return 0; +} + static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, struct net_device *dev, - unsigned long event, void *ptr) + unsigned long event, void *ptr, + bool replay_deslavement) { struct netdev_notifier_changeupper_info *info; struct mlxsw_sp_port *mlxsw_sp_port; @@ -4640,8 +4811,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, (!netif_is_bridge_master(upper_dev) || !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev))) { - NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported"); - return -EINVAL; + err = mlxsw_sp_netdevice_validate_uppers(mlxsw_sp, + upper_dev, + extack); + if (err) + return err; } if (netif_is_lag_master(upper_dev) && !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev, @@ -4656,11 +4830,6 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on a LAG port"); return -EINVAL; } - if (netif_is_macvlan(upper_dev) && - !mlxsw_sp_rif_exists(mlxsw_sp, lower_dev)) { - NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); - return -EOPNOTSUPP; - } if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) { NL_SET_ERR_MSG_MOD(extack, "Master device is an OVS master and this device has a VLAN"); return -EINVAL; @@ -4707,15 +4876,20 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; if (netif_is_bridge_master(upper_dev)) { - if (info->linking) + if (info->linking) { err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, lower_dev, upper_dev, extack); - else + } else { mlxsw_sp_port_bridge_leave(mlxsw_sp_port, lower_dev, upper_dev); + if (!replay_deslavement) + break; + mlxsw_sp_netdevice_deslavement_replay(mlxsw_sp, + lower_dev); + } } else if (netif_is_lag_master(upper_dev)) { if (info->linking) { err = mlxsw_sp_port_lag_join(mlxsw_sp_port, @@ -4724,6 +4898,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port); mlxsw_sp_port_lag_leave(mlxsw_sp_port, upper_dev); + mlxsw_sp_netdevice_deslavement_replay(mlxsw_sp, + dev); } } else if (netif_is_ovs_master(upper_dev)) { if (info->linking) @@ -4776,13 +4952,15 @@ static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev, static int mlxsw_sp_netdevice_port_event(struct net_device *lower_dev, struct net_device *port_dev, - unsigned long event, void *ptr) + unsigned long event, void *ptr, + bool replay_deslavement) { switch (event) { case NETDEV_PRECHANGEUPPER: case NETDEV_CHANGEUPPER: return mlxsw_sp_netdevice_port_upper_event(lower_dev, port_dev, - event, ptr); + event, ptr, + replay_deslavement); case NETDEV_CHANGELOWERSTATE: return mlxsw_sp_netdevice_port_lower_event(port_dev, event, ptr); @@ -4791,6 +4969,30 @@ static int mlxsw_sp_netdevice_port_event(struct net_device *lower_dev, return 0; } +/* Called for LAG or its upper VLAN after the per-LAG-lower processing was done, + * to do any per-LAG / per-LAG-upper processing. + */ +static int mlxsw_sp_netdevice_post_lag_event(struct net_device *dev, + unsigned long event, + void *ptr) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(dev); + struct netdev_notifier_changeupper_info *info = ptr; + + if (!mlxsw_sp) + return 0; + + switch (event) { + case NETDEV_CHANGEUPPER: + if (info->linking) + break; + if (netif_is_bridge_master(info->upper_dev)) + mlxsw_sp_netdevice_deslavement_replay(mlxsw_sp, dev); + break; + } + return 0; +} + static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, unsigned long event, void *ptr) { @@ -4801,19 +5003,19 @@ static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, netdev_for_each_lower_dev(lag_dev, dev, iter) { if (mlxsw_sp_port_dev_check(dev)) { ret = mlxsw_sp_netdevice_port_event(lag_dev, dev, event, - ptr); + ptr, false); if (ret) return ret; } } - return 0; + return mlxsw_sp_netdevice_post_lag_event(lag_dev, event, ptr); } static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, struct net_device *dev, unsigned long event, void *ptr, - u16 vid) + u16 vid, bool replay_deslavement) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; @@ -4844,27 +5046,30 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, (!netif_is_bridge_master(upper_dev) || !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev))) { - NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported"); - return -EINVAL; - } - if (netif_is_macvlan(upper_dev) && - !mlxsw_sp_rif_exists(mlxsw_sp, vlan_dev)) { - NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); - return -EOPNOTSUPP; + err = mlxsw_sp_netdevice_validate_uppers(mlxsw_sp, + upper_dev, + extack); + if (err) + return err; } break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; if (netif_is_bridge_master(upper_dev)) { - if (info->linking) + if (info->linking) { err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, vlan_dev, upper_dev, extack); - else + } else { mlxsw_sp_port_bridge_leave(mlxsw_sp_port, vlan_dev, upper_dev); + if (!replay_deslavement) + break; + mlxsw_sp_netdevice_deslavement_replay(mlxsw_sp, + vlan_dev); + } } else if (netif_is_macvlan(upper_dev)) { if (!info->linking) mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev); @@ -4888,26 +5093,26 @@ static int mlxsw_sp_netdevice_lag_port_vlan_event(struct net_device *vlan_dev, if (mlxsw_sp_port_dev_check(dev)) { ret = mlxsw_sp_netdevice_port_vlan_event(vlan_dev, dev, event, ptr, - vid); + vid, false); if (ret) return ret; } } - return 0; + return mlxsw_sp_netdevice_post_lag_event(vlan_dev, event, ptr); } -static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev, +static int mlxsw_sp_netdevice_bridge_vlan_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *vlan_dev, struct net_device *br_dev, unsigned long event, void *ptr, - u16 vid) + u16 vid, bool process_foreign) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev); struct netdev_notifier_changeupper_info *info = ptr; struct netlink_ext_ack *extack; struct net_device *upper_dev; - if (!mlxsw_sp) + if (!process_foreign && !mlxsw_sp_lower_get(vlan_dev)) return 0; extack = netdev_notifier_info_to_extack(&info->info); @@ -4920,13 +5125,6 @@ static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev, NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); return -EOPNOTSUPP; } - if (!info->linking) - break; - if (netif_is_macvlan(upper_dev) && - !mlxsw_sp_rif_exists(mlxsw_sp, vlan_dev)) { - NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); - return -EOPNOTSUPP; - } break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; @@ -4940,36 +5138,42 @@ static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev, return 0; } -static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev, - unsigned long event, void *ptr) +static int mlxsw_sp_netdevice_vlan_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *vlan_dev, + unsigned long event, void *ptr, + bool process_foreign) { struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); u16 vid = vlan_dev_vlan_id(vlan_dev); if (mlxsw_sp_port_dev_check(real_dev)) return mlxsw_sp_netdevice_port_vlan_event(vlan_dev, real_dev, - event, ptr, vid); + event, ptr, vid, + true); else if (netif_is_lag_master(real_dev)) return mlxsw_sp_netdevice_lag_port_vlan_event(vlan_dev, real_dev, event, ptr, vid); else if (netif_is_bridge_master(real_dev)) - return mlxsw_sp_netdevice_bridge_vlan_event(vlan_dev, real_dev, - event, ptr, vid); + return mlxsw_sp_netdevice_bridge_vlan_event(mlxsw_sp, vlan_dev, + real_dev, event, + ptr, vid, + process_foreign); return 0; } -static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, - unsigned long event, void *ptr) +static int mlxsw_sp_netdevice_bridge_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev, + unsigned long event, void *ptr, + bool process_foreign) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(br_dev); struct netdev_notifier_changeupper_info *info = ptr; struct netlink_ext_ack *extack; struct net_device *upper_dev; u16 proto; - if (!mlxsw_sp) + if (!process_foreign && !mlxsw_sp_lower_get(br_dev)) return 0; extack = netdev_notifier_info_to_extack(&info->info); @@ -4997,11 +5201,6 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, NL_SET_ERR_MSG_MOD(extack, "VLAN uppers are only supported with 802.1q VLAN protocol"); return -EOPNOTSUPP; } - if (netif_is_macvlan(upper_dev) && - !mlxsw_sp_rif_exists(mlxsw_sp, br_dev)) { - NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); - return -EOPNOTSUPP; - } break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; @@ -5107,35 +5306,48 @@ static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp, return 0; } -static int mlxsw_sp_netdevice_event(struct notifier_block *nb, - unsigned long event, void *ptr) +static int __mlxsw_sp_netdevice_event(struct mlxsw_sp *mlxsw_sp, + unsigned long event, void *ptr, + bool process_foreign) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct mlxsw_sp_span_entry *span_entry; - struct mlxsw_sp *mlxsw_sp; int err = 0; - mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb); if (event == NETDEV_UNREGISTER) { span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, dev); if (span_entry) mlxsw_sp_span_entry_invalidate(mlxsw_sp, span_entry); } - mlxsw_sp_span_respin(mlxsw_sp); if (netif_is_vxlan(dev)) err = mlxsw_sp_netdevice_vxlan_event(mlxsw_sp, dev, event, ptr); else if (mlxsw_sp_port_dev_check(dev)) - err = mlxsw_sp_netdevice_port_event(dev, dev, event, ptr); + err = mlxsw_sp_netdevice_port_event(dev, dev, event, ptr, true); else if (netif_is_lag_master(dev)) err = mlxsw_sp_netdevice_lag_event(dev, event, ptr); else if (is_vlan_dev(dev)) - err = mlxsw_sp_netdevice_vlan_event(dev, event, ptr); + err = mlxsw_sp_netdevice_vlan_event(mlxsw_sp, dev, event, ptr, + process_foreign); else if (netif_is_bridge_master(dev)) - err = mlxsw_sp_netdevice_bridge_event(dev, event, ptr); + err = mlxsw_sp_netdevice_bridge_event(mlxsw_sp, dev, event, ptr, + process_foreign); else if (netif_is_macvlan(dev)) err = mlxsw_sp_netdevice_macvlan_event(dev, event, ptr); + return err; +} + +static int mlxsw_sp_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlxsw_sp *mlxsw_sp; + int err; + + mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb); + mlxsw_sp_span_respin(mlxsw_sp); + err = __mlxsw_sp_netdevice_event(mlxsw_sp, event, ptr, false); + return notifier_from_errno(err); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 231e364cbb7c..02ca2871b6f9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -69,6 +69,7 @@ enum mlxsw_sp_resource_id { MLXSW_SP_RESOURCE_SINGLE_RATE_POLICERS, MLXSW_SP_RESOURCE_RIF_MAC_PROFILES, MLXSW_SP_RESOURCE_RIFS, + MLXSW_SP_RESOURCE_PORT_RANGE_REGISTERS, }; struct mlxsw_sp_port; @@ -175,6 +176,7 @@ struct mlxsw_sp { struct mlxsw_sp_acl *acl; struct mlxsw_sp_fid_core *fid_core; struct mlxsw_sp_policer_core *policer_core; + struct mlxsw_sp_port_range_core *pr_core; struct mlxsw_sp_kvdl *kvdl; struct mlxsw_sp_nve *nve; struct notifier_block netdevice_nb; @@ -698,6 +700,8 @@ int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, struct mlxsw_sp_port_vlan * mlxsw_sp_port_vlan_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); void mlxsw_sp_port_vlan_destroy(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); +int mlxsw_sp_port_kill_vid(struct net_device *dev, + __be16 __always_unused proto, u16 vid); int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool is_member, bool untagged); int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, @@ -716,8 +720,6 @@ int mlxsw_sp_txhdr_ptp_data_construct(struct mlxsw_core *mlxsw_core, bool mlxsw_sp_port_dev_check(const struct net_device *dev); struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev); struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev); -struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); -void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port); struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev); int mlxsw_sp_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_parsing_depth_dec(struct mlxsw_sp *mlxsw_sp); @@ -865,9 +867,13 @@ struct mlxsw_sp_acl_rule_info { egress_bind_blocker:1, counter_valid:1, policer_index_valid:1, - ipv6_valid:1; + ipv6_valid:1, + src_port_range_reg_valid:1, + dst_port_range_reg_valid:1; unsigned int counter_index; u16 policer_index; + u8 src_port_range_reg_index; + u8 dst_port_range_reg_index; struct { u32 prev_val; enum mlxsw_sp_acl_mangle_field prev_field; @@ -992,7 +998,8 @@ void mlxsw_sp_acl_ruleset_prio_get(struct mlxsw_sp_acl_ruleset *ruleset, struct mlxsw_sp_acl_rule_info * mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl, struct mlxsw_afa_block *afa_block); -void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp_acl_rule_info *rulei); +void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_commit(struct mlxsw_sp_acl_rule_info *rulei); void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei, unsigned int priority); @@ -1043,6 +1050,9 @@ int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp, int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, u16 fid, struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_ignore(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + bool disable_learning, bool disable_security); int mlxsw_sp_acl_rulei_act_sample(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct mlxsw_sp_flow_block *block, @@ -1261,7 +1271,6 @@ int mlxsw_sp_setup_tc_block_qevent_mark(struct mlxsw_sp_port *mlxsw_sp_port, struct flow_block_offload *f); /* spectrum_fid.c */ -bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index); struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp, u16 fid_index); int mlxsw_sp_fid_nve_ifindex(const struct mlxsw_sp_fid *fid, int *nve_ifindex); @@ -1394,10 +1403,6 @@ void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port); int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp); -/* spectrum_nve_vxlan.c */ -int mlxsw_sp_nve_inc_parsing_depth_get(struct mlxsw_sp *mlxsw_sp); -void mlxsw_sp_nve_inc_parsing_depth_put(struct mlxsw_sp *mlxsw_sp); - /* spectrum_trap.c */ int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_devlink_traps_fini(struct mlxsw_sp *mlxsw_sp); @@ -1484,4 +1489,18 @@ int mlxsw_sp_pgt_entry_port_set(struct mlxsw_sp *mlxsw_sp, u16 mid, int mlxsw_sp_pgt_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_pgt_fini(struct mlxsw_sp *mlxsw_sp); +/* spectrum_port_range.c */ +struct mlxsw_sp_port_range { + u16 min; + u16 max; + u8 source:1; /* Source or destination */ +}; + +int mlxsw_sp_port_range_reg_get(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_port_range *range, + struct netlink_ext_ack *extack, + u8 *p_prr_index); +void mlxsw_sp_port_range_reg_put(struct mlxsw_sp *mlxsw_sp, u8 prr_index); +int mlxsw_sp_port_range_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_port_range_fini(struct mlxsw_sp *mlxsw_sp); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c index 3a636f753607..dfcdd37e797b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c @@ -90,7 +90,7 @@ mlxsw_sp1_acl_ctcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp, err_entry_add: err_rulei_commit: err_rulei_act_continue: - mlxsw_sp_acl_rulei_destroy(rulei); + mlxsw_sp_acl_rulei_destroy(mlxsw_sp, rulei); err_rulei_create: mlxsw_sp_acl_ctcam_chunk_fini(®ion->catchall.cchunk); return err; @@ -105,7 +105,7 @@ mlxsw_sp1_acl_ctcam_region_catchall_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_ctcam_entry_del(mlxsw_sp, ®ion->cregion, ®ion->catchall.cchunk, ®ion->catchall.centry); - mlxsw_sp_acl_rulei_destroy(rulei); + mlxsw_sp_acl_rulei_destroy(mlxsw_sp, rulei); mlxsw_sp_acl_ctcam_chunk_fini(®ion->catchall.cchunk); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c index e4f4cded2b6f..b1178b7a7f51 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c @@ -193,7 +193,7 @@ mlxsw_sp2_mr_tcam_rule_parse(struct mlxsw_sp_acl_rule *rule, key->vrid, GENMASK(7, 0)); mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB, - key->vrid >> 8, GENMASK(2, 0)); + key->vrid >> 8, GENMASK(3, 0)); switch (key->proto) { case MLXSW_SP_L3_PROTO_IPV4: return mlxsw_sp2_mr_tcam_rule_parse4(rulei, key); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 0423ac262d89..7c59c8a13584 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -339,10 +339,17 @@ err_afa_block_create: return ERR_PTR(err); } -void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp_acl_rule_info *rulei) +void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei) { if (rulei->action_created) mlxsw_afa_block_destroy(rulei->act_block); + if (rulei->src_port_range_reg_valid) + mlxsw_sp_port_range_reg_put(mlxsw_sp, + rulei->src_port_range_reg_index); + if (rulei->dst_port_range_reg_valid) + mlxsw_sp_port_range_reg_put(mlxsw_sp, + rulei->dst_port_range_reg_index); kfree(rulei); } @@ -768,6 +775,15 @@ int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp, return mlxsw_afa_block_append_fid_set(rulei->act_block, fid, extack); } +int mlxsw_sp_acl_rulei_act_ignore(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + bool disable_learning, bool disable_security) +{ + return mlxsw_afa_block_append_ignore(rulei->act_block, + disable_learning, + disable_security); +} + int mlxsw_sp_acl_rulei_act_sample(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct mlxsw_sp_flow_block *block, @@ -834,7 +850,7 @@ void mlxsw_sp_acl_rule_destroy(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; - mlxsw_sp_acl_rulei_destroy(rule->rulei); + mlxsw_sp_acl_rulei_destroy(mlxsw_sp, rule->rulei); kfree(rule); mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c index 4dea39f2b304..cb746a43b24b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c @@ -31,12 +31,14 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = { static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_sip[] = { MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x00, 4), + MLXSW_AFK_ELEMENT_INST_U32(L4_PORT_RANGE, 0x04, 16, 16), MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8), MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = { MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x00, 4), + MLXSW_AFK_ELEMENT_INST_U32(L4_PORT_RANGE, 0x04, 16, 16), MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8), MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), }; @@ -171,7 +173,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_2[] = { static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4[] = { MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 24, 8), - MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_MSB, 0x00, 0, 3), + MLXSW_AFK_ELEMENT_INST_EXT_U32(VIRT_ROUTER_MSB, 0x00, 0, 3, 0, true), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_0[] = { @@ -205,6 +207,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l4_0[] = { static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l4_2[] = { MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x04, 16, 9), /* TCP_CONTROL + TCP_ECN */ + MLXSW_AFK_ELEMENT_INST_U32(L4_PORT_RANGE, 0x04, 0, 16), }; static const struct mlxsw_afk_block mlxsw_sp2_afk_blocks[] = { @@ -321,7 +324,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5b[] = { static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4b[] = { MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 13, 8), - MLXSW_AFK_ELEMENT_INST_EXT_U32(VIRT_ROUTER_MSB, 0x04, 21, 4, 0, true), + MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_MSB, 0x04, 21, 4), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_2b[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index b6ee2d658b0c..9df098474743 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -137,16 +137,6 @@ static const int *mlxsw_sp_packet_type_sfgc_types[] = { [MLXSW_SP_FLOOD_TYPE_MC] = mlxsw_sp_sfgc_mc_packet_types, }; -bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index) -{ - enum mlxsw_sp_fid_type fid_type = MLXSW_SP_FID_TYPE_DUMMY; - struct mlxsw_sp_fid_family *fid_family; - - fid_family = mlxsw_sp->fid_core->fid_family_arr[fid_type]; - - return fid_family->start_index == fid_index; -} - struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp, u16 fid_index) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 72917f09e806..9fd1ca079258 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -160,6 +160,16 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, */ rulei->egress_bind_blocker = 1; + /* Ignore learning and security lookup as redirection + * using ingress filters happens before the bridge. + */ + err = mlxsw_sp_acl_rulei_act_ignore(mlxsw_sp, rulei, + true, true); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append ignore action"); + return err; + } + fid = mlxsw_sp_acl_dummy_fid(mlxsw_sp); fid_index = mlxsw_sp_fid_index(fid); err = mlxsw_sp_acl_rulei_act_fid_set(mlxsw_sp, rulei, @@ -418,6 +428,68 @@ static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp, return 0; } +static int +mlxsw_sp_flower_parse_ports_range(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct flow_cls_offload *f, u8 ip_proto) +{ + const struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_match_ports_range match; + u32 key_mask_value = 0; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS_RANGE)) + return 0; + + if (ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Only UDP and TCP keys are supported"); + return -EINVAL; + } + + flow_rule_match_ports_range(rule, &match); + + if (match.mask->tp_min.src) { + struct mlxsw_sp_port_range range = { + .min = ntohs(match.key->tp_min.src), + .max = ntohs(match.key->tp_max.src), + .source = true, + }; + u8 prr_index; + int err; + + err = mlxsw_sp_port_range_reg_get(mlxsw_sp, &range, + f->common.extack, &prr_index); + if (err) + return err; + + rulei->src_port_range_reg_index = prr_index; + rulei->src_port_range_reg_valid = true; + key_mask_value |= BIT(prr_index); + } + + if (match.mask->tp_min.dst) { + struct mlxsw_sp_port_range range = { + .min = ntohs(match.key->tp_min.dst), + .max = ntohs(match.key->tp_max.dst), + }; + u8 prr_index; + int err; + + err = mlxsw_sp_port_range_reg_get(mlxsw_sp, &range, + f->common.extack, &prr_index); + if (err) + return err; + + rulei->dst_port_range_reg_index = prr_index; + rulei->dst_port_range_reg_valid = true; + key_mask_value |= BIT(prr_index); + } + + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_L4_PORT_RANGE, + key_mask_value, key_mask_value); + + return 0; +} + static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct flow_cls_offload *f, @@ -496,16 +568,17 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, int err; if (dissector->used_keys & - ~(BIT(FLOW_DISSECTOR_KEY_META) | - BIT(FLOW_DISSECTOR_KEY_CONTROL) | - BIT(FLOW_DISSECTOR_KEY_BASIC) | - BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | - BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | - BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | - BIT(FLOW_DISSECTOR_KEY_PORTS) | - BIT(FLOW_DISSECTOR_KEY_TCP) | - BIT(FLOW_DISSECTOR_KEY_IP) | - BIT(FLOW_DISSECTOR_KEY_VLAN))) { + ~(BIT_ULL(FLOW_DISSECTOR_KEY_META) | + BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) | + BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) | + BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) | + BIT_ULL(FLOW_DISSECTOR_KEY_PORTS_RANGE) | + BIT_ULL(FLOW_DISSECTOR_KEY_TCP) | + BIT_ULL(FLOW_DISSECTOR_KEY_IP) | + BIT_ULL(FLOW_DISSECTOR_KEY_VLAN))) { dev_err(mlxsw_sp->bus_info->dev, "Unsupported key\n"); NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported key"); return -EOPNOTSUPP; @@ -604,6 +677,11 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_flower_parse_ports(mlxsw_sp, rulei, f, ip_proto); if (err) return err; + + err = mlxsw_sp_flower_parse_ports_range(mlxsw_sp, rulei, f, ip_proto); + if (err) + return err; + err = mlxsw_sp_flower_parse_tcp(mlxsw_sp, rulei, f, ip_proto); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c index d2b57a045aa4..5479a1c19d2e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c @@ -989,6 +989,9 @@ void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp, int nve_ifindex; __be32 vni; + /* Necessary for __dev_get_by_index() below. */ + ASSERT_RTNL(); + mlxsw_sp_nve_flood_ip_flush(mlxsw_sp, fid); mlxsw_sp_nve_fdb_flush_by_fid(mlxsw_sp, fid_index); mlxsw_sp_nve_ipv6_addr_flush_by_fid(mlxsw_sp, fid_index); @@ -997,15 +1000,13 @@ void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fid_vni(fid, &vni))) goto out; - nve_dev = dev_get_by_index(mlxsw_sp_net(mlxsw_sp), nve_ifindex); + nve_dev = __dev_get_by_index(mlxsw_sp_net(mlxsw_sp), nve_ifindex); if (!nve_dev) goto out; mlxsw_sp_nve_fdb_clear_offload(mlxsw_sp, fid, nve_dev, vni); mlxsw_sp_fid_fdb_clear_offload(fid, nve_dev); - dev_put(nve_dev); - out: mlxsw_sp_fid_vni_clear(fid); mlxsw_sp_nve_tunnel_fini(mlxsw_sp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_port_range.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_port_range.c new file mode 100644 index 000000000000..2d193de12be6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_port_range.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <linux/bits.h> +#include <linux/netlink.h> +#include <linux/refcount.h> +#include <linux/xarray.h> +#include <net/devlink.h> + +#include "spectrum.h" + +struct mlxsw_sp_port_range_reg { + struct mlxsw_sp_port_range range; + refcount_t refcount; + u32 index; +}; + +struct mlxsw_sp_port_range_core { + struct xarray prr_xa; + struct xa_limit prr_ids; + atomic_t prr_count; +}; + +static int +mlxsw_sp_port_range_reg_configure(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_port_range_reg *prr) +{ + char pprr_pl[MLXSW_REG_PPRR_LEN]; + + /* We do not care if packet is IPv4/IPv6 and TCP/UDP, so set all four + * fields. + */ + mlxsw_reg_pprr_pack(pprr_pl, prr->index); + mlxsw_reg_pprr_ipv4_set(pprr_pl, true); + mlxsw_reg_pprr_ipv6_set(pprr_pl, true); + mlxsw_reg_pprr_src_set(pprr_pl, prr->range.source); + mlxsw_reg_pprr_dst_set(pprr_pl, !prr->range.source); + mlxsw_reg_pprr_tcp_set(pprr_pl, true); + mlxsw_reg_pprr_udp_set(pprr_pl, true); + mlxsw_reg_pprr_port_range_min_set(pprr_pl, prr->range.min); + mlxsw_reg_pprr_port_range_max_set(pprr_pl, prr->range.max); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pprr), pprr_pl); +} + +static struct mlxsw_sp_port_range_reg * +mlxsw_sp_port_range_reg_create(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_port_range *range, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port_range_core *pr_core = mlxsw_sp->pr_core; + struct mlxsw_sp_port_range_reg *prr; + int err; + + prr = kzalloc(sizeof(*prr), GFP_KERNEL); + if (!prr) + return ERR_PTR(-ENOMEM); + + prr->range = *range; + refcount_set(&prr->refcount, 1); + + err = xa_alloc(&pr_core->prr_xa, &prr->index, prr, pr_core->prr_ids, + GFP_KERNEL); + if (err) { + if (err == -EBUSY) + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of port range registers"); + goto err_xa_alloc; + } + + err = mlxsw_sp_port_range_reg_configure(mlxsw_sp, prr); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to configure port range register"); + goto err_reg_configure; + } + + atomic_inc(&pr_core->prr_count); + + return prr; + +err_reg_configure: + xa_erase(&pr_core->prr_xa, prr->index); +err_xa_alloc: + kfree(prr); + return ERR_PTR(err); +} + +static void mlxsw_sp_port_range_reg_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port_range_reg *prr) +{ + struct mlxsw_sp_port_range_core *pr_core = mlxsw_sp->pr_core; + + atomic_dec(&pr_core->prr_count); + xa_erase(&pr_core->prr_xa, prr->index); + kfree(prr); +} + +static struct mlxsw_sp_port_range_reg * +mlxsw_sp_port_range_reg_find(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_port_range *range) +{ + struct mlxsw_sp_port_range_core *pr_core = mlxsw_sp->pr_core; + struct mlxsw_sp_port_range_reg *prr; + unsigned long index; + + xa_for_each(&pr_core->prr_xa, index, prr) { + if (prr->range.min == range->min && + prr->range.max == range->max && + prr->range.source == range->source) + return prr; + } + + return NULL; +} + +int mlxsw_sp_port_range_reg_get(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_port_range *range, + struct netlink_ext_ack *extack, + u8 *p_prr_index) +{ + struct mlxsw_sp_port_range_reg *prr; + + prr = mlxsw_sp_port_range_reg_find(mlxsw_sp, range); + if (prr) { + refcount_inc(&prr->refcount); + *p_prr_index = prr->index; + return 0; + } + + prr = mlxsw_sp_port_range_reg_create(mlxsw_sp, range, extack); + if (IS_ERR(prr)) + return PTR_ERR(prr); + + *p_prr_index = prr->index; + + return 0; +} + +void mlxsw_sp_port_range_reg_put(struct mlxsw_sp *mlxsw_sp, u8 prr_index) +{ + struct mlxsw_sp_port_range_core *pr_core = mlxsw_sp->pr_core; + struct mlxsw_sp_port_range_reg *prr; + + prr = xa_load(&pr_core->prr_xa, prr_index); + if (WARN_ON(!prr)) + return; + + if (!refcount_dec_and_test(&prr->refcount)) + return; + + mlxsw_sp_port_range_reg_destroy(mlxsw_sp, prr); +} + +static u64 mlxsw_sp_port_range_reg_occ_get(void *priv) +{ + struct mlxsw_sp_port_range_core *pr_core = priv; + + return atomic_read(&pr_core->prr_count); +} + +int mlxsw_sp_port_range_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_port_range_core *pr_core; + struct mlxsw_core *core = mlxsw_sp->core; + u64 max; + + if (!MLXSW_CORE_RES_VALID(core, ACL_MAX_L4_PORT_RANGE)) + return -EIO; + max = MLXSW_CORE_RES_GET(core, ACL_MAX_L4_PORT_RANGE); + + /* Each port range register is represented using a single bit in the + * two bytes "l4_port_range" ACL key element. + */ + WARN_ON(max > BITS_PER_BYTE * sizeof(u16)); + + pr_core = kzalloc(sizeof(*mlxsw_sp->pr_core), GFP_KERNEL); + if (!pr_core) + return -ENOMEM; + mlxsw_sp->pr_core = pr_core; + + pr_core->prr_ids.max = max - 1; + xa_init_flags(&pr_core->prr_xa, XA_FLAGS_ALLOC); + + devl_resource_occ_get_register(priv_to_devlink(core), + MLXSW_SP_RESOURCE_PORT_RANGE_REGISTERS, + mlxsw_sp_port_range_reg_occ_get, + pr_core); + + return 0; +} + +void mlxsw_sp_port_range_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_port_range_core *pr_core = mlxsw_sp->pr_core; + + devl_resource_occ_get_unregister(priv_to_devlink(mlxsw_sp->core), + MLXSW_SP_RESOURCE_PORT_RANGE_REGISTERS); + WARN_ON(!xa_empty(&pr_core->prr_xa)); + xa_destroy(&pr_core->prr_xa); + kfree(pr_core); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index b32adf277a22..debd2c466f11 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -71,6 +71,7 @@ static const struct rhashtable_params mlxsw_sp_crif_ht_params = { struct mlxsw_sp_rif { struct mlxsw_sp_crif *crif; /* NULL for underlay RIF */ + netdevice_tracker dev_tracker; struct list_head neigh_list; struct mlxsw_sp_fid *fid; unsigned char addr[ETH_ALEN]; @@ -139,6 +140,7 @@ struct mlxsw_sp_rif_ops { struct netlink_ext_ack *extack); void (*deconfigure)(struct mlxsw_sp_rif *rif); struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif, + const struct mlxsw_sp_rif_params *params, struct netlink_ext_ack *extack); void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac); }; @@ -2871,6 +2873,21 @@ static bool mlxsw_sp_dev_lower_is_port(struct net_device *dev) return !!mlxsw_sp_port; } +static int mlxsw_sp_router_schedule_neigh_work(struct mlxsw_sp_router *router, + struct neighbour *n) +{ + struct net *net; + + net = neigh_parms_net(n->parms); + + /* Take a reference to ensure the neighbour won't be destructed until we + * drop the reference in delayed work. + */ + neigh_clone(n); + return mlxsw_sp_router_schedule_work(net, router, n, + mlxsw_sp_router_neigh_event_work); +} + static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -2878,7 +2895,6 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, unsigned long interval; struct neigh_parms *p; struct neighbour *n; - struct net *net; router = container_of(nb, struct mlxsw_sp_router, netevent_nb); @@ -2902,7 +2918,6 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, break; case NETEVENT_NEIGH_UPDATE: n = ptr; - net = neigh_parms_net(n->parms); if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6) return NOTIFY_DONE; @@ -2910,13 +2925,7 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, if (!mlxsw_sp_dev_lower_is_port(n->dev)) return NOTIFY_DONE; - /* Take a reference to ensure the neighbour won't be - * destructed until we drop the reference in delayed - * work. - */ - neigh_clone(n); - return mlxsw_sp_router_schedule_work(net, router, n, - mlxsw_sp_router_neigh_event_work); + return mlxsw_sp_router_schedule_neigh_work(router, n); case NETEVENT_IPV4_MPATH_HASH_UPDATE: case NETEVENT_IPV6_MPATH_HASH_UPDATE: @@ -2975,6 +2984,52 @@ static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, } } +struct mlxsw_sp_neigh_rif_made_sync { + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + int err; +}; + +static void mlxsw_sp_neigh_rif_made_sync_each(struct neighbour *n, void *data) +{ + struct mlxsw_sp_neigh_rif_made_sync *rms = data; + int rc; + + if (rms->err) + return; + if (n->dev != mlxsw_sp_rif_dev(rms->rif)) + return; + rc = mlxsw_sp_router_schedule_neigh_work(rms->mlxsw_sp->router, n); + if (rc != NOTIFY_DONE) + rms->err = -ENOMEM; +} + +static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_neigh_rif_made_sync rms = { + .mlxsw_sp = mlxsw_sp, + .rif = rif, + }; + + neigh_for_each(&arp_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms); + if (rms.err) + goto err_arp; + +#if IS_ENABLED(CONFIG_IPV6) + neigh_for_each(&nd_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms); +#endif + if (rms.err) + goto err_nd; + + return 0; + +err_nd: +err_arp: + mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); + return rms.err; +} + enum mlxsw_sp_nexthop_type { MLXSW_SP_NEXTHOP_TYPE_ETH, MLXSW_SP_NEXTHOP_TYPE_IPIP, @@ -4396,6 +4451,19 @@ err_neigh_init: return err; } +static int mlxsw_sp_nexthop_type_rif_made(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + switch (nh->type) { + case MLXSW_SP_NEXTHOP_TYPE_ETH: + return mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + case MLXSW_SP_NEXTHOP_TYPE_IPIP: + break; + } + + return 0; +} + static void mlxsw_sp_nexthop_type_rif_gone(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { @@ -4524,6 +4592,35 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, } } +static int mlxsw_sp_nexthop_rif_made_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_nexthop *nh, *tmp; + unsigned int n = 0; + int err; + + list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list, + crif_list_node) { + err = mlxsw_sp_nexthop_type_rif_made(mlxsw_sp, nh); + if (err) + goto err_nexthop_type_rif; + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); + n++; + } + + return 0; + +err_nexthop_type_rif: + list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list, + crif_list_node) { + if (!n--) + break; + mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); + } + return err; +} + static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { @@ -7451,6 +7548,7 @@ struct mlxsw_sp_fib6_event_work { struct mlxsw_sp_fib_event_work { struct work_struct work; + netdevice_tracker dev_tracker; union { struct mlxsw_sp_fib6_event_work fib6_work; struct fib_entry_notifier_info fen_info; @@ -7624,12 +7722,12 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) &fib_work->ven_info); if (err) dev_warn(mlxsw_sp->bus_info->dev, "MR VIF add failed.\n"); - dev_put(fib_work->ven_info.dev); + netdev_put(fib_work->ven_info.dev, &fib_work->dev_tracker); break; case FIB_EVENT_VIF_DEL: mlxsw_sp_router_fibmr_vif_del(mlxsw_sp, &fib_work->ven_info); - dev_put(fib_work->ven_info.dev); + netdev_put(fib_work->ven_info.dev, &fib_work->dev_tracker); break; } mutex_unlock(&mlxsw_sp->router->lock); @@ -7700,7 +7798,8 @@ mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work, case FIB_EVENT_VIF_ADD: case FIB_EVENT_VIF_DEL: memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info)); - dev_hold(fib_work->ven_info.dev); + netdev_hold(fib_work->ven_info.dev, &fib_work->dev_tracker, + GFP_ATOMIC); break; } } @@ -7884,6 +7983,26 @@ static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); } +static int mlxsw_sp_router_rif_made_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif) +{ + int err; + + err = mlxsw_sp_neigh_rif_made_sync(mlxsw_sp, rif); + if (err) + return err; + + err = mlxsw_sp_nexthop_rif_made_sync(mlxsw_sp, rif); + if (err) + goto err_nexthop; + + return 0; + +err_nexthop: + mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); + return err; +} + static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { @@ -8190,6 +8309,7 @@ mlxsw_sp_router_port_l3_stats_report_delta(struct mlxsw_sp_rif *rif, struct mlxsw_sp_router_hwstats_notify_work { struct work_struct work; struct net_device *dev; + netdevice_tracker dev_tracker; }; static void mlxsw_sp_router_hwstats_notify_work(struct work_struct *work) @@ -8201,7 +8321,7 @@ static void mlxsw_sp_router_hwstats_notify_work(struct work_struct *work) rtnl_lock(); rtnl_offload_xstats_notify(hws_work->dev); rtnl_unlock(); - dev_put(hws_work->dev); + netdev_put(hws_work->dev, &hws_work->dev_tracker); kfree(hws_work); } @@ -8221,7 +8341,7 @@ mlxsw_sp_router_hwstats_notify_schedule(struct net_device *dev) return; INIT_WORK(&hws_work->work, mlxsw_sp_router_hwstats_notify_work); - dev_hold(dev); + netdev_hold(dev, &hws_work->dev_tracker, GFP_KERNEL); hws_work->dev = dev; mlxsw_core_schedule_work(&hws_work->work); } @@ -8293,14 +8413,14 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, err = -ENOMEM; goto err_rif_alloc; } - dev_hold(params->dev); + netdev_hold(params->dev, &rif->dev_tracker, GFP_KERNEL); mlxsw_sp->router->rifs[rif_index] = rif; rif->mlxsw_sp = mlxsw_sp; rif->ops = ops; rif->rif_entries = rif_entries; if (ops->fid_get) { - fid = ops->fid_get(rif, extack); + fid = ops->fid_get(rif, params, extack); if (IS_ERR(fid)) { err = PTR_ERR(fid); goto err_fid_get; @@ -8321,6 +8441,10 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, goto err_mr_rif_add; } + err = mlxsw_sp_router_rif_made_sync(mlxsw_sp, rif); + if (err) + goto err_rif_made_sync; + if (netdev_offload_xstats_enabled(params->dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3)) { err = mlxsw_sp_router_port_l3_stats_enable(rif); @@ -8335,6 +8459,8 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, return rif; err_stats_enable: + mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); +err_rif_made_sync: err_mr_rif_add: for (i--; i >= 0; i--) mlxsw_sp_mr_rif_del(vr->mr_table[i], rif); @@ -8344,7 +8470,7 @@ err_configure: mlxsw_sp_fid_put(fid); err_fid_get: mlxsw_sp->router->rifs[rif_index] = NULL; - dev_put(params->dev); + netdev_put(params->dev, &rif->dev_tracker); mlxsw_sp_rif_free(rif); err_rif_alloc: err_crif_lookup: @@ -8386,7 +8512,7 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) /* Loopback RIFs are not associated with a FID. */ mlxsw_sp_fid_put(fid); mlxsw_sp->router->rifs[rif->rif_index] = NULL; - dev_put(dev); + netdev_put(dev, &rif->dev_tracker); mlxsw_sp_rif_free(rif); mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); vr->rif_count--; @@ -8410,6 +8536,110 @@ out: mutex_unlock(&mlxsw_sp->router->lock); } +static void mlxsw_sp_rif_destroy_vlan_upper(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev, + u16 vid) +{ + struct net_device *upper_dev; + struct mlxsw_sp_crif *crif; + + rcu_read_lock(); + upper_dev = __vlan_find_dev_deep_rcu(br_dev, htons(ETH_P_8021Q), vid); + rcu_read_unlock(); + + if (!upper_dev) + return; + + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, upper_dev); + if (!crif || !crif->rif) + return; + + mlxsw_sp_rif_destroy(crif->rif); +} + +static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + int lower_pvid, + unsigned long event, + struct netlink_ext_ack *extack); + +int mlxsw_sp_router_bridge_vlan_add(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev, + u16 new_vid, bool is_pvid, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif *old_rif; + struct mlxsw_sp_rif *new_rif; + struct net_device *upper_dev; + u16 old_pvid = 0; + u16 new_pvid; + int err = 0; + + mutex_lock(&mlxsw_sp->router->lock); + old_rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, br_dev); + if (old_rif) { + /* If the RIF on the bridge is not a VLAN RIF, we shouldn't have + * gotten a PVID notification. + */ + if (WARN_ON(old_rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)) + old_rif = NULL; + else + old_pvid = mlxsw_sp_fid_8021q_vid(old_rif->fid); + } + + if (is_pvid) + new_pvid = new_vid; + else if (old_pvid == new_vid) + new_pvid = 0; + else + goto out; + + if (old_pvid == new_pvid) + goto out; + + if (new_pvid) { + struct mlxsw_sp_rif_params params = { + .dev = br_dev, + .vid = new_pvid, + }; + + /* If there is a VLAN upper with the same VID as the new PVID, + * kill its RIF, if there is one. + */ + mlxsw_sp_rif_destroy_vlan_upper(mlxsw_sp, br_dev, new_pvid); + + if (mlxsw_sp_dev_addr_list_empty(br_dev)) + goto out; + new_rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack); + if (IS_ERR(new_rif)) { + err = PTR_ERR(new_rif); + goto out; + } + + if (old_pvid) + mlxsw_sp_rif_migrate_destroy(mlxsw_sp, old_rif, new_rif, + true); + } else { + mlxsw_sp_rif_destroy(old_rif); + } + + if (old_pvid) { + rcu_read_lock(); + upper_dev = __vlan_find_dev_deep_rcu(br_dev, htons(ETH_P_8021Q), + old_pvid); + rcu_read_unlock(); + if (upper_dev) + err = mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, + upper_dev, + new_pvid, + NETDEV_UP, extack); + } + +out: + mutex_unlock(&mlxsw_sp->router->lock); + return err; +} + static void mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params, struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) @@ -8664,21 +8894,24 @@ __mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_rif_params params = { - .dev = l3_dev, - }; + struct mlxsw_sp_rif_params params; u16 vid = mlxsw_sp_port_vlan->vid; struct mlxsw_sp_rif *rif; struct mlxsw_sp_fid *fid; int err; + params = (struct mlxsw_sp_rif_params) { + .dev = l3_dev, + .vid = vid, + }; + mlxsw_sp_rif_subport_params_init(¶ms, mlxsw_sp_port_vlan); rif = mlxsw_sp_rif_subport_get(mlxsw_sp, ¶ms, extack); if (IS_ERR(rif)) return PTR_ERR(rif); /* FID was already created, just take a reference */ - fid = rif->ops->fid_get(rif, extack); + fid = rif->ops->fid_get(rif, ¶ms, extack); err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid); if (err) goto err_fid_port_vid_map; @@ -8776,10 +9009,11 @@ static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev, } static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev, - unsigned long event, + unsigned long event, bool nomaster, struct netlink_ext_ack *extack) { - if (netif_is_any_bridge_port(port_dev) || netif_is_lag_port(port_dev)) + if (!nomaster && (netif_is_any_bridge_port(port_dev) || + netif_is_lag_port(port_dev))) return 0; return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, @@ -8810,10 +9044,10 @@ static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, } static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev, - unsigned long event, + unsigned long event, bool nomaster, struct netlink_ext_ack *extack) { - if (netif_is_bridge_port(lag_dev)) + if (!nomaster && netif_is_bridge_port(lag_dev)) return 0; return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, @@ -8822,6 +9056,7 @@ static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev, static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp, struct net_device *l3_dev, + int lower_pvid, unsigned long event, struct netlink_ext_ack *extack) { @@ -8829,6 +9064,7 @@ static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp, .dev = l3_dev, }; struct mlxsw_sp_rif *rif; + int err; switch (event) { case NETDEV_UP: @@ -8840,7 +9076,21 @@ static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp, NL_SET_ERR_MSG_MOD(extack, "Adding an IP address to 802.1ad bridge is not supported"); return -EOPNOTSUPP; } + err = br_vlan_get_pvid(l3_dev, ¶ms.vid); + if (err) + return err; + if (!params.vid) + return 0; + } else if (is_vlan_dev(l3_dev)) { + params.vid = vlan_dev_vlan_id(l3_dev); + + /* If the VID matches PVID of the bridge below, the + * bridge owns the RIF for this VLAN. Don't do anything. + */ + if ((int)params.vid == lower_pvid) + return 0; } + rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack); if (IS_ERR(rif)) return PTR_ERR(rif); @@ -8856,24 +9106,32 @@ static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp, struct net_device *vlan_dev, - unsigned long event, + unsigned long event, bool nomaster, struct netlink_ext_ack *extack) { struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); u16 vid = vlan_dev_vlan_id(vlan_dev); + u16 lower_pvid; + int err; - if (netif_is_bridge_port(vlan_dev)) + if (!nomaster && netif_is_bridge_port(vlan_dev)) return 0; - if (mlxsw_sp_port_dev_check(real_dev)) + if (mlxsw_sp_port_dev_check(real_dev)) { return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev, event, vid, extack); - else if (netif_is_lag_master(real_dev)) + } else if (netif_is_lag_master(real_dev)) { return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event, vid, extack); - else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev)) - return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event, + } else if (netif_is_bridge_master(real_dev) && + br_vlan_enabled(real_dev)) { + err = br_vlan_get_pvid(real_dev, &lower_pvid); + if (err) + return err; + return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, + lower_pvid, event, extack); + } return 0; } @@ -8927,10 +9185,8 @@ static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp, int err; rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev); - if (!rif) { - NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); - return -EOPNOTSUPP; - } + if (!rif) + return 0; err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr, mlxsw_sp_fid_index(rif->fid), true); @@ -9000,19 +9256,21 @@ static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp, static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp, struct net_device *dev, - unsigned long event, + unsigned long event, bool nomaster, struct netlink_ext_ack *extack) { if (mlxsw_sp_port_dev_check(dev)) - return mlxsw_sp_inetaddr_port_event(dev, event, extack); + return mlxsw_sp_inetaddr_port_event(dev, event, nomaster, + extack); else if (netif_is_lag_master(dev)) - return mlxsw_sp_inetaddr_lag_event(dev, event, extack); + return mlxsw_sp_inetaddr_lag_event(dev, event, nomaster, + extack); else if (netif_is_bridge_master(dev)) - return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event, + return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, -1, event, extack); else if (is_vlan_dev(dev)) return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event, - extack); + nomaster, extack); else if (netif_is_macvlan(dev)) return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event, extack); @@ -9039,7 +9297,8 @@ static int mlxsw_sp_inetaddr_event(struct notifier_block *nb, if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL); + err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, false, + NULL); out: mutex_unlock(&router->lock); return notifier_from_errno(err); @@ -9063,7 +9322,8 @@ static int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack); + err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false, + ivi->extack); out: mutex_unlock(&mlxsw_sp->router->lock); return notifier_from_errno(err); @@ -9073,6 +9333,7 @@ struct mlxsw_sp_inet6addr_event_work { struct work_struct work; struct mlxsw_sp *mlxsw_sp; struct net_device *dev; + netdevice_tracker dev_tracker; unsigned long event; }; @@ -9092,11 +9353,11 @@ static void mlxsw_sp_inet6addr_event_work(struct work_struct *work) if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL); + __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false, NULL); out: mutex_unlock(&mlxsw_sp->router->lock); rtnl_unlock(); - dev_put(dev); + netdev_put(dev, &inet6addr_work->dev_tracker); kfree(inet6addr_work); } @@ -9122,7 +9383,7 @@ static int mlxsw_sp_inet6addr_event(struct notifier_block *nb, inet6addr_work->mlxsw_sp = router->mlxsw_sp; inet6addr_work->dev = dev; inet6addr_work->event = event; - dev_hold(dev); + netdev_hold(dev, &inet6addr_work->dev_tracker, GFP_ATOMIC); mlxsw_core_schedule_work(&inet6addr_work->work); return NOTIFY_DONE; @@ -9146,7 +9407,8 @@ static int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack); + err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false, + i6vi->extack); out: mutex_unlock(&mlxsw_sp->router->lock); return notifier_from_errno(err); @@ -9466,10 +9728,11 @@ static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp, */ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); if (rif) - __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, + __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, false, extack); - return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack); + return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, false, + extack); } static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp, @@ -9480,7 +9743,7 @@ static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp, rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); if (!rif) return; - __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL); + __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, false, NULL); } static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr) @@ -9523,6 +9786,116 @@ mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, return err; } +struct mlxsw_sp_router_replay_inetaddr_up { + struct mlxsw_sp *mlxsw_sp; + struct netlink_ext_ack *extack; + unsigned int done; + bool deslavement; +}; + +static int mlxsw_sp_router_replay_inetaddr_up(struct net_device *dev, + struct netdev_nested_priv *priv) +{ + struct mlxsw_sp_router_replay_inetaddr_up *ctx = priv->data; + bool nomaster = ctx->deslavement; + struct mlxsw_sp_crif *crif; + int err; + + if (mlxsw_sp_dev_addr_list_empty(dev)) + return 0; + + crif = mlxsw_sp_crif_lookup(ctx->mlxsw_sp->router, dev); + if (!crif || crif->rif) + return 0; + + if (!mlxsw_sp_rif_should_config(crif->rif, dev, NETDEV_UP)) + return 0; + + err = __mlxsw_sp_inetaddr_event(ctx->mlxsw_sp, dev, NETDEV_UP, + nomaster, ctx->extack); + if (err) + return err; + + ctx->done++; + return 0; +} + +static int mlxsw_sp_router_unreplay_inetaddr_up(struct net_device *dev, + struct netdev_nested_priv *priv) +{ + struct mlxsw_sp_router_replay_inetaddr_up *ctx = priv->data; + bool nomaster = ctx->deslavement; + struct mlxsw_sp_crif *crif; + + if (!ctx->done) + return 0; + + if (mlxsw_sp_dev_addr_list_empty(dev)) + return 0; + + crif = mlxsw_sp_crif_lookup(ctx->mlxsw_sp->router, dev); + if (!crif || !crif->rif) + return 0; + + /* We are rolling back NETDEV_UP, so ask for that. */ + if (!mlxsw_sp_rif_should_config(crif->rif, dev, NETDEV_UP)) + return 0; + + __mlxsw_sp_inetaddr_event(ctx->mlxsw_sp, dev, NETDEV_DOWN, nomaster, + NULL); + + ctx->done--; + return 0; +} + +int mlxsw_sp_netdevice_enslavement_replay(struct mlxsw_sp *mlxsw_sp, + struct net_device *upper_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_router_replay_inetaddr_up ctx = { + .mlxsw_sp = mlxsw_sp, + .extack = extack, + .deslavement = false, + }; + struct netdev_nested_priv priv = { + .data = &ctx, + }; + int err; + + err = mlxsw_sp_router_replay_inetaddr_up(upper_dev, &priv); + if (err) + return err; + + err = netdev_walk_all_upper_dev_rcu(upper_dev, + mlxsw_sp_router_replay_inetaddr_up, + &priv); + if (err) + goto err_replay_up; + + return 0; + +err_replay_up: + netdev_walk_all_upper_dev_rcu(upper_dev, + mlxsw_sp_router_unreplay_inetaddr_up, + &priv); + mlxsw_sp_router_unreplay_inetaddr_up(upper_dev, &priv); + return err; +} + +void mlxsw_sp_netdevice_deslavement_replay(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev) +{ + struct mlxsw_sp_router_replay_inetaddr_up ctx = { + .mlxsw_sp = mlxsw_sp, + .deslavement = true, + }; + struct netdev_nested_priv priv = { + .data = &ctx, + }; + + mlxsw_sp_router_replay_inetaddr_up(dev, &priv); +} + static int mlxsw_sp_port_vid_router_join_existing(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, struct net_device *dev, @@ -9539,15 +9912,84 @@ mlxsw_sp_port_vid_router_join_existing(struct mlxsw_sp_port *mlxsw_sp_port, dev, extack); } +static void +mlxsw_sp_port_vid_router_leave(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, + struct net_device *dev) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, + vid); + if (WARN_ON(!mlxsw_sp_port_vlan)) + return; + + __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); +} + static int __mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *lag_dev, struct netlink_ext_ack *extack) { u16 default_vid = MLXSW_SP_DEFAULT_VID; + struct net_device *upper_dev; + struct list_head *iter; + int done = 0; + u16 vid; + int err; - return mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port, - default_vid, lag_dev, - extack); + err = mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port, default_vid, + lag_dev, extack); + if (err) + return err; + + netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) { + if (!is_vlan_dev(upper_dev)) + continue; + + vid = vlan_dev_vlan_id(upper_dev); + err = mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port, vid, + upper_dev, extack); + if (err) + goto err_router_join_dev; + + ++done; + } + + return 0; + +err_router_join_dev: + netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) { + if (!is_vlan_dev(upper_dev)) + continue; + if (!done--) + break; + + vid = vlan_dev_vlan_id(upper_dev); + mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, vid, upper_dev); + } + + mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, default_vid, lag_dev); + return err; +} + +static void +__mlxsw_sp_router_port_leave_lag(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + u16 default_vid = MLXSW_SP_DEFAULT_VID; + struct net_device *upper_dev; + struct list_head *iter; + u16 vid; + + netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) { + if (!is_vlan_dev(upper_dev)) + continue; + + vid = vlan_dev_vlan_id(upper_dev); + mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, vid, upper_dev); + } + + mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, default_vid, lag_dev); } int mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port, @@ -9563,6 +10005,14 @@ int mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port, return err; } +void mlxsw_sp_router_port_leave_lag(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + mutex_lock(&mlxsw_sp_port->mlxsw_sp->router->lock); + __mlxsw_sp_router_port_leave_lag(mlxsw_sp_port, lag_dev); + mutex_unlock(&mlxsw_sp_port->mlxsw_sp->router->lock); +} + static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -9608,6 +10058,40 @@ out: return notifier_from_errno(err); } +struct mlxsw_sp_macvlan_replay { + struct mlxsw_sp *mlxsw_sp; + struct netlink_ext_ack *extack; +}; + +static int mlxsw_sp_macvlan_replay_upper(struct net_device *dev, + struct netdev_nested_priv *priv) +{ + const struct mlxsw_sp_macvlan_replay *rms = priv->data; + struct netlink_ext_ack *extack = rms->extack; + struct mlxsw_sp *mlxsw_sp = rms->mlxsw_sp; + + if (!netif_is_macvlan(dev)) + return 0; + + return mlxsw_sp_rif_macvlan_add(mlxsw_sp, dev, extack); +} + +static int mlxsw_sp_macvlan_replay(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_macvlan_replay rms = { + .mlxsw_sp = rif->mlxsw_sp, + .extack = extack, + }; + struct netdev_nested_priv priv = { + .data = &rms, + }; + + return netdev_walk_all_upper_dev_rcu(mlxsw_sp_rif_dev(rif), + mlxsw_sp_macvlan_replay_upper, + &priv); +} + static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, struct netdev_nested_priv *priv) { @@ -9630,7 +10114,6 @@ static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif) if (!netif_is_macvlan_port(dev)) return 0; - netdev_warn(dev, "Router interface is deleted. Upper macvlans will not work\n"); return netdev_walk_all_upper_dev_rcu(dev, __mlxsw_sp_rif_macvlan_flush, &priv); } @@ -9688,6 +10171,10 @@ static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif, if (err) goto err_rif_subport_op; + err = mlxsw_sp_macvlan_replay(rif, extack); + if (err) + goto err_macvlan_replay; + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), true); if (err) @@ -9703,6 +10190,8 @@ err_fid_rif_set: mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), false); err_rif_fdb_op: + mlxsw_sp_rif_macvlan_flush(rif); +err_macvlan_replay: mlxsw_sp_rif_subport_op(rif, false); err_rif_subport_op: mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, mac_profile); @@ -9724,6 +10213,7 @@ static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif) static struct mlxsw_sp_fid * mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif, + const struct mlxsw_sp_rif_params *params, struct netlink_ext_ack *extack) { return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index); @@ -9788,6 +10278,10 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif, if (err) goto err_fid_bc_flood_set; + err = mlxsw_sp_macvlan_replay(rif, extack); + if (err) + goto err_macvlan_replay; + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), true); if (err) @@ -9803,6 +10297,8 @@ err_fid_rif_set: mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), false); err_rif_fdb_op: + mlxsw_sp_rif_macvlan_flush(rif); +err_macvlan_replay: mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), false); err_fid_bc_flood_set: @@ -9836,6 +10332,7 @@ static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif) static struct mlxsw_sp_fid * mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif, + const struct mlxsw_sp_rif_params *params, struct netlink_ext_ack *extack) { int rif_ifindex = mlxsw_sp_rif_dev_ifindex(rif); @@ -9869,27 +10366,22 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = { static struct mlxsw_sp_fid * mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif, + const struct mlxsw_sp_rif_params *params, struct netlink_ext_ack *extack) { struct net_device *dev = mlxsw_sp_rif_dev(rif); struct net_device *br_dev; - u16 vid; - int err; + + if (WARN_ON(!params->vid)) + return ERR_PTR(-EINVAL); if (is_vlan_dev(dev)) { - vid = vlan_dev_vlan_id(dev); br_dev = vlan_dev_real_dev(dev); if (WARN_ON(!netif_is_bridge_master(br_dev))) return ERR_PTR(-EINVAL); - } else { - err = br_vlan_get_pvid(dev, &vid); - if (err < 0 || !vid) { - NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID"); - return ERR_PTR(-EINVAL); - } } - return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid); + return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, params->vid); } static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) @@ -9954,6 +10446,10 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif, u16 efid, if (err) goto err_fid_bc_flood_set; + err = mlxsw_sp_macvlan_replay(rif, extack); + if (err) + goto err_macvlan_replay; + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), true); if (err) @@ -9969,6 +10465,8 @@ err_fid_rif_set: mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), false); err_rif_fdb_op: + mlxsw_sp_rif_macvlan_flush(rif); +err_macvlan_replay: mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), false); err_fid_bc_flood_set: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 9a2669a08480..ed3b628caafe 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -171,8 +171,19 @@ int mlxsw_sp_ipip_ecn_encap_init(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_ipip_ecn_decap_init(struct mlxsw_sp *mlxsw_sp); struct net_device * mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev); +int mlxsw_sp_router_bridge_vlan_add(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev, + u16 new_vid, bool is_pvid, + struct netlink_ext_ack *extack); int mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *lag_dev, struct netlink_ext_ack *extack); +void mlxsw_sp_router_port_leave_lag(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev); +int mlxsw_sp_netdevice_enslavement_replay(struct mlxsw_sp *mlxsw_sp, + struct net_device *upper_dev, + struct netlink_ext_ack *extack); +void mlxsw_sp_netdevice_deslavement_replay(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev); #endif /* _MLXSW_ROUTER_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h index 82e711afb02b..c59b5f11f357 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h @@ -93,13 +93,8 @@ void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp); struct mlxsw_sp_span_entry * mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp, const struct net_device *to_dev); - void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_span_entry *span_entry); - -int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu); -void mlxsw_sp_span_speed_update_work(struct work_struct *work); - int mlxsw_sp_span_agent_get(struct mlxsw_sp *mlxsw_sp, int *p_span_id, const struct mlxsw_sp_span_agent_parms *parms); void mlxsw_sp_span_agent_put(struct mlxsw_sp *mlxsw_sp, int span_id); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index d88e62bc759f..6c749c148148 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -384,6 +384,91 @@ mlxsw_sp_bridge_port_find(struct mlxsw_sp_bridge *bridge, return __mlxsw_sp_bridge_port_find(bridge_device, brport_dev); } +static int mlxsw_sp_port_obj_add(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack); +static int mlxsw_sp_port_obj_del(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj); + +struct mlxsw_sp_bridge_port_replay_switchdev_objs { + struct net_device *brport_dev; + struct mlxsw_sp_port *mlxsw_sp_port; + int done; +}; + +static int +mlxsw_sp_bridge_port_replay_switchdev_objs(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + struct switchdev_notifier_port_obj_info *port_obj_info = ptr; + struct netlink_ext_ack *extack = port_obj_info->info.extack; + struct mlxsw_sp_bridge_port_replay_switchdev_objs *rso; + int err = 0; + + rso = (void *)port_obj_info->info.ctx; + + if (event != SWITCHDEV_PORT_OBJ_ADD || + dev != rso->brport_dev) + goto out; + + /* When a port is joining the bridge through a LAG, there likely are + * VLANs configured on that LAG already. The replay will thus attempt to + * have the given port-vlans join the corresponding FIDs. But the LAG + * netdevice has already called the ndo_vlan_rx_add_vid NDO for its VLAN + * memberships, back before CHANGEUPPER was distributed and netdevice + * master set. So now before propagating the VLAN events further, we + * first need to kill the corresponding VID at the mlxsw_sp_port. + * + * Note that this doesn't need to be rolled back on failure -- if the + * replay fails, the enslavement is off, and the VIDs would be killed by + * LAG anyway as part of its rollback. + */ + if (port_obj_info->obj->id == SWITCHDEV_OBJ_ID_PORT_VLAN) { + u16 vid = SWITCHDEV_OBJ_PORT_VLAN(port_obj_info->obj)->vid; + + err = mlxsw_sp_port_kill_vid(rso->mlxsw_sp_port->dev, 0, vid); + if (err) + goto out; + } + + ++rso->done; + err = mlxsw_sp_port_obj_add(rso->mlxsw_sp_port->dev, NULL, + port_obj_info->obj, extack); + +out: + return notifier_from_errno(err); +} + +static struct notifier_block mlxsw_sp_bridge_port_replay_switchdev_objs_nb = { + .notifier_call = mlxsw_sp_bridge_port_replay_switchdev_objs, +}; + +static int +mlxsw_sp_bridge_port_unreplay_switchdev_objs(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + struct switchdev_notifier_port_obj_info *port_obj_info = ptr; + struct mlxsw_sp_bridge_port_replay_switchdev_objs *rso; + + rso = (void *)port_obj_info->info.ctx; + + if (event != SWITCHDEV_PORT_OBJ_ADD || + dev != rso->brport_dev) + return NOTIFY_DONE; + if (!rso->done--) + return NOTIFY_STOP; + + mlxsw_sp_port_obj_del(rso->mlxsw_sp_port->dev, NULL, + port_obj_info->obj); + return NOTIFY_DONE; +} + +static struct notifier_block mlxsw_sp_bridge_port_unreplay_switchdev_objs_nb = { + .notifier_call = mlxsw_sp_bridge_port_unreplay_switchdev_objs, +}; + static struct mlxsw_sp_bridge_port * mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device, struct net_device *brport_dev, @@ -405,7 +490,7 @@ mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device, bridge_port->system_port = mlxsw_sp_port->local_port; bridge_port->dev = brport_dev; bridge_port->bridge_device = bridge_device; - bridge_port->stp_state = BR_STATE_DISABLED; + bridge_port->stp_state = br_port_get_stp_state(brport_dev); bridge_port->flags = BR_LEARNING | BR_FLOOD | BR_LEARNING_SYNC | BR_MCAST_FLOOD; INIT_LIST_HEAD(&bridge_port->vlans_list); @@ -1479,29 +1564,15 @@ err_port_vlan_set: } static int -mlxsw_sp_br_ban_rif_pvid_change(struct mlxsw_sp *mlxsw_sp, - const struct net_device *br_dev, - const struct switchdev_obj_port_vlan *vlan) +mlxsw_sp_br_rif_pvid_change(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack) { - u16 pvid; - - pvid = mlxsw_sp_rif_vid(mlxsw_sp, br_dev); - if (!pvid) - return 0; - - if (vlan->flags & BRIDGE_VLAN_INFO_PVID) { - if (vlan->vid != pvid) { - netdev_err(br_dev, "Can't change PVID, it's used by router interface\n"); - return -EBUSY; - } - } else { - if (vlan->vid == pvid) { - netdev_err(br_dev, "Can't remove PVID, it's used by router interface\n"); - return -EBUSY; - } - } + bool flag_pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; - return 0; + return mlxsw_sp_router_bridge_vlan_add(mlxsw_sp, br_dev, vlan->vid, + flag_pvid, extack); } static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, @@ -1518,8 +1589,8 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, int err = 0; if (br_vlan_enabled(orig_dev)) - err = mlxsw_sp_br_ban_rif_pvid_change(mlxsw_sp, - orig_dev, vlan); + err = mlxsw_sp_br_rif_pvid_change(mlxsw_sp, orig_dev, + vlan, extack); if (!err) err = -EOPNOTSUPP; return err; @@ -2365,6 +2436,33 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp, } static int +mlxsw_sp_bridge_port_replay(struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_bridge_port_replay_switchdev_objs rso = { + .brport_dev = bridge_port->dev, + .mlxsw_sp_port = mlxsw_sp_port, + }; + struct notifier_block *nb; + int err; + + nb = &mlxsw_sp_bridge_port_replay_switchdev_objs_nb; + err = switchdev_bridge_port_replay(bridge_port->dev, mlxsw_sp_port->dev, + &rso, NULL, nb, extack); + if (err) + goto err_replay; + + return 0; + +err_replay: + nb = &mlxsw_sp_bridge_port_unreplay_switchdev_objs_nb; + switchdev_bridge_port_replay(bridge_port->dev, mlxsw_sp_port->dev, + &rso, NULL, nb, extack); + return err; +} + +static int mlxsw_sp_bridge_vlan_aware_port_join(struct mlxsw_sp_bridge_port *bridge_port, struct mlxsw_sp_port *mlxsw_sp_port, struct netlink_ext_ack *extack) @@ -2378,7 +2476,7 @@ mlxsw_sp_bridge_vlan_aware_port_join(struct mlxsw_sp_bridge_port *bridge_port, if (mlxsw_sp_port->default_vlan->fid) mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port->default_vlan); - return 0; + return mlxsw_sp_bridge_port_replay(bridge_port, mlxsw_sp_port, extack); } static int @@ -2550,6 +2648,7 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; struct net_device *dev = bridge_port->dev; u16 vid; + int err; vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : MLXSW_SP_DEFAULT_VID; mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); @@ -2565,8 +2664,20 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device, if (mlxsw_sp_port_vlan->fid) mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); - return mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port, - extack); + err = mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port, + extack); + if (err) + return err; + + err = mlxsw_sp_bridge_port_replay(bridge_port, mlxsw_sp_port, extack); + if (err) + goto err_replay; + + return 0; + +err_replay: + mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); + return err; } static void @@ -2783,8 +2894,15 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, if (err) goto err_port_join; + err = mlxsw_sp_netdevice_enslavement_replay(mlxsw_sp, br_dev, extack); + if (err) + goto err_replay; + return 0; +err_replay: + bridge_device->ops->port_leave(bridge_device, bridge_port, + mlxsw_sp_port); err_port_join: mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port); return err; @@ -2948,9 +3066,6 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, goto just_remove; } - if (mlxsw_sp_fid_is_dummy(mlxsw_sp, fid)) - goto just_remove; - mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_fid(mlxsw_sp_port, fid); if (!mlxsw_sp_port_vlan) { netdev_err(mlxsw_sp_port->dev, "Failed to find a matching {Port, VID} following FDB notification\n"); @@ -3018,9 +3133,6 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, goto just_remove; } - if (mlxsw_sp_fid_is_dummy(mlxsw_sp, fid)) - goto just_remove; - mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_fid(mlxsw_sp_port, fid); if (!mlxsw_sp_port_vlan) { netdev_err(mlxsw_sp_port->dev, "Failed to find a matching {Port, VID} following FDB notification\n"); @@ -3262,6 +3374,7 @@ out: struct mlxsw_sp_switchdev_event_work { struct work_struct work; + netdevice_tracker dev_tracker; union { struct switchdev_notifier_fdb_info fdb_info; struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info; @@ -3418,8 +3531,8 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work) out: rtnl_unlock(); kfree(switchdev_work->fdb_info.addr); + netdev_put(dev, &switchdev_work->dev_tracker); kfree(switchdev_work); - dev_put(dev); } static void @@ -3430,7 +3543,6 @@ mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp, struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info; struct mlxsw_sp_bridge_device *bridge_device; struct net_device *dev = switchdev_work->dev; - u8 all_zeros_mac[ETH_ALEN] = { 0 }; enum mlxsw_sp_l3proto proto; union mlxsw_sp_l3addr addr; struct net_device *br_dev; @@ -3452,7 +3564,7 @@ mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip, &proto, &addr); - if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) { + if (is_zero_ether_addr(vxlan_fdb_info->eth_addr)) { err = mlxsw_sp_nve_flood_ip_add(mlxsw_sp, fid, proto, &addr); if (err) { mlxsw_sp_fid_put(fid); @@ -3504,7 +3616,6 @@ mlxsw_sp_switchdev_vxlan_fdb_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_bridge_device *bridge_device; struct net_device *dev = switchdev_work->dev; struct net_device *br_dev = netdev_master_upper_dev_get(dev); - u8 all_zeros_mac[ETH_ALEN] = { 0 }; enum mlxsw_sp_l3proto proto; union mlxsw_sp_l3addr addr; struct mlxsw_sp_fid *fid; @@ -3525,7 +3636,7 @@ mlxsw_sp_switchdev_vxlan_fdb_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip, &proto, &addr); - if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) { + if (is_zero_ether_addr(vxlan_fdb_info->eth_addr)) { mlxsw_sp_nve_flood_ip_del(mlxsw_sp, fid, proto, &addr); mlxsw_sp_fid_put(fid); return; @@ -3574,8 +3685,8 @@ static void mlxsw_sp_switchdev_vxlan_fdb_event_work(struct work_struct *work) out: rtnl_unlock(); + netdev_put(dev, &switchdev_work->dev_tracker); kfree(switchdev_work); - dev_put(dev); } static int @@ -3675,7 +3786,7 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, * upper device containig mlxsw_sp_port or just a * mlxsw_sp_port */ - dev_hold(dev); + netdev_hold(dev, &switchdev_work->dev_tracker, GFP_ATOMIC); break; case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE: case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE: @@ -3685,7 +3796,7 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, info); if (err) goto err_vxlan_work_prepare; - dev_hold(dev); + netdev_hold(dev, &switchdev_work->dev_tracker, GFP_ATOMIC); break; default: kfree(switchdev_work); |