diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 593 |
1 files changed, 516 insertions, 77 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 7b0d3de0ec6c..55b38544422f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -35,11 +35,15 @@ #include <crypto/aead.h> #include <linux/inetdevice.h> #include <linux/netdevice.h> +#include <net/netevent.h> #include "en.h" #include "ipsec.h" #include "ipsec_rxtx.h" +#define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000) +#define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1 + static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x) { return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; @@ -50,27 +54,71 @@ static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x) return (struct mlx5e_ipsec_pol_entry *)x->xdo.offload_handle; } +static void mlx5e_ipsec_handle_tx_limit(struct work_struct *_work) +{ + struct mlx5e_ipsec_dwork *dwork = + container_of(_work, struct mlx5e_ipsec_dwork, dwork.work); + struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry; + struct xfrm_state *x = sa_entry->x; + + spin_lock(&x->lock); + xfrm_state_check_expire(x); + if (x->km.state == XFRM_STATE_EXPIRED) { + sa_entry->attrs.drop = true; + mlx5e_accel_ipsec_fs_modify(sa_entry); + } + spin_unlock(&x->lock); + + if (sa_entry->attrs.drop) + return; + + queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork, + MLX5_IPSEC_RESCHED); +} + static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) { - struct xfrm_replay_state_esn *replay_esn; + struct xfrm_state *x = sa_entry->x; u32 seq_bottom = 0; + u32 esn, esn_msb; u8 overlap; - if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) { - sa_entry->esn_state.trigger = 0; + switch (x->xso.type) { + case XFRM_DEV_OFFLOAD_PACKET: + switch (x->xso.dir) { + case XFRM_DEV_OFFLOAD_IN: + esn = x->replay_esn->seq; + esn_msb = x->replay_esn->seq_hi; + break; + case XFRM_DEV_OFFLOAD_OUT: + esn = x->replay_esn->oseq; + esn_msb = x->replay_esn->oseq_hi; + break; + default: + WARN_ON(true); + return false; + } + break; + case XFRM_DEV_OFFLOAD_CRYPTO: + /* Already parsed by XFRM core */ + esn = x->replay_esn->seq; + break; + default: + WARN_ON(true); return false; } - replay_esn = sa_entry->x->replay_esn; - if (replay_esn->seq >= replay_esn->replay_window) - seq_bottom = replay_esn->seq - replay_esn->replay_window + 1; - overlap = sa_entry->esn_state.overlap; - sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x, - htonl(seq_bottom)); + if (esn >= x->replay_esn->replay_window) + seq_bottom = esn - x->replay_esn->replay_window + 1; + + if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO) + esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom)); + + sa_entry->esn_state.esn = esn; + sa_entry->esn_state.esn_msb = esn_msb; - sa_entry->esn_state.trigger = 1; if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) { sa_entry->esn_state.overlap = 0; return true; @@ -87,25 +135,161 @@ static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_accel_esp_xfrm_attrs *attrs) { struct xfrm_state *x = sa_entry->x; + s64 start_value, n; - attrs->hard_packet_limit = x->lft.hard_packet_limit; + attrs->lft.hard_packet_limit = x->lft.hard_packet_limit; + attrs->lft.soft_packet_limit = x->lft.soft_packet_limit; if (x->lft.soft_packet_limit == XFRM_INF) return; - /* Hardware decrements hard_packet_limit counter through - * the operation. While fires an event when soft_packet_limit - * is reached. It emans that we need substitute the numbers - * in order to properly count soft limit. + /* Compute hard limit initial value and number of rounds. + * + * The counting pattern of hardware counter goes: + * value -> 2^31-1 + * 2^31 | (2^31-1) -> 2^31-1 + * 2^31 | (2^31-1) -> 2^31-1 + * [..] + * 2^31 | (2^31-1) -> 0 * - * As an example: - * XFRM user sets soft limit is 2 and hard limit is 9 and - * expects to see soft event after 2 packets and hard event - * after 9 packets. In our case, the hard limit will be set - * to 9 and soft limit is comparator to 7 so user gets the - * soft event after 2 packeta + * The pattern is created by using an ASO operation to atomically set + * bit 31 after the down counter clears bit 31. This is effectively an + * atomic addition of 2**31 to the counter. + * + * We wish to configure the counter, within the above pattern, so that + * when it reaches 0, it has hit the hard limit. This is defined by this + * system of equations: + * + * hard_limit == start_value + n * 2^31 + * n >= 0 + * start_value < 2^32, start_value >= 0 + * + * These equations are not single-solution, there are often two choices: + * hard_limit == start_value + n * 2^31 + * hard_limit == (start_value+2^31) + (n-1) * 2^31 + * + * The algorithm selects the solution that keeps the counter value + * above 2^31 until the final iteration. + */ + + /* Start by estimating n and compute start_value */ + n = attrs->lft.hard_packet_limit / BIT_ULL(31); + start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31); + + /* Choose the best of the two solutions: */ + if (n >= 1) + n -= 1; + + /* Computed values solve the system of equations: */ + start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31); + + /* The best solution means: when there are multiple iterations we must + * start above 2^31 and count down to 2**31 to get the interrupt. + */ + attrs->lft.hard_packet_limit = lower_32_bits(start_value); + attrs->lft.numb_rounds_hard = (u64)n; + + /* Compute soft limit initial value and number of rounds. + * + * The soft_limit is achieved by adjusting the counter's + * interrupt_value. This is embedded in the counting pattern created by + * hard packet calculations above. + * + * We wish to compute the interrupt_value for the soft_limit. This is + * defined by this system of equations: + * + * soft_limit == start_value - soft_value + n * 2^31 + * n >= 0 + * soft_value < 2^32, soft_value >= 0 + * for n == 0 start_value > soft_value + * + * As with compute_hard_n_value() the equations are not single-solution. + * The algorithm selects the solution that has: + * 2^30 <= soft_limit < 2^31 + 2^30 + * for the interior iterations, which guarantees a large guard band + * around the counter hard limit and next interrupt. + */ + + /* Start by estimating n and compute soft_value */ + n = (x->lft.soft_packet_limit - attrs->lft.hard_packet_limit) / BIT_ULL(31); + start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - + x->lft.soft_packet_limit; + + /* Compare against constraints and adjust n */ + if (n < 0) + n = 0; + else if (start_value >= BIT_ULL(32)) + n -= 1; + else if (start_value < 0) + n += 1; + + /* Choose the best of the two solutions: */ + start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value; + if (n != attrs->lft.numb_rounds_hard && start_value < BIT_ULL(30)) + n += 1; + + /* Note that the upper limit of soft_value happens naturally because we + * always select the lowest soft_value. */ - attrs->soft_packet_limit = - x->lft.hard_packet_limit - x->lft.soft_packet_limit; + + /* Computed values solve the system of equations: */ + start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value; + + /* The best solution means: when there are multiple iterations we must + * not fall below 2^30 as that would get too close to the false + * hard_limit and when we reach an interior iteration for soft_limit it + * has to be far away from 2**32-1 which is the counter reset point + * after the +2^31 to accommodate latency. + */ + attrs->lft.soft_packet_limit = lower_32_bits(start_value); + attrs->lft.numb_rounds_soft = (u64)n; +} + +static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + struct xfrm_state *x = sa_entry->x; + struct net_device *netdev; + struct neighbour *n; + u8 addr[ETH_ALEN]; + const void *pkey; + u8 *dst, *src; + + if (attrs->mode != XFRM_MODE_TUNNEL || + attrs->type != XFRM_DEV_OFFLOAD_PACKET) + return; + + netdev = x->xso.real_dev; + + mlx5_query_mac_address(mdev, addr); + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: + src = attrs->dmac; + dst = attrs->smac; + pkey = &attrs->saddr.a4; + break; + case XFRM_DEV_OFFLOAD_OUT: + src = attrs->smac; + dst = attrs->dmac; + pkey = &attrs->daddr.a4; + break; + default: + return; + } + + ether_addr_copy(src, addr); + n = neigh_lookup(&arp_tbl, pkey, netdev); + if (!n) { + n = neigh_create(&arp_tbl, pkey, netdev); + if (IS_ERR(n)) + return; + neigh_event_send(n, NULL); + attrs->drop = true; + } else { + neigh_ha_snapshot(addr, n, netdev); + ether_addr_copy(dst, addr); + } + neigh_release(n); } void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, @@ -141,11 +325,11 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, aes_gcm->icv_len = x->aead->alg_icv_len; /* esn */ - if (sa_entry->esn_state.trigger) { - attrs->esn_trigger = true; - attrs->esn = sa_entry->esn_state.esn; - attrs->esn_overlap = sa_entry->esn_state.overlap; - attrs->replay_window = x->replay_esn->replay_window; + if (x->props.flags & XFRM_STATE_ESN) { + attrs->replay_esn.trigger = true; + attrs->replay_esn.esn = sa_entry->esn_state.esn; + attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb; + attrs->replay_esn.overlap = sa_entry->esn_state.overlap; } attrs->dir = x->xso.dir; @@ -163,8 +347,10 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, attrs->upspec.sport = ntohs(x->sel.sport); attrs->upspec.sport_mask = ntohs(x->sel.sport_mask); attrs->upspec.proto = x->sel.proto; + attrs->mode = x->props.mode; mlx5e_ipsec_init_limits(sa_entry, attrs); + mlx5e_ipsec_init_macs(sa_entry, attrs); } static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, @@ -233,6 +419,11 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, return -EINVAL; } + if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) { + NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded"); + return -EINVAL; + } + switch (x->xso.type) { case XFRM_DEV_OFFLOAD_CRYPTO: if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) { @@ -240,11 +431,6 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, return -EINVAL; } - if (x->props.mode != XFRM_MODE_TRANSPORT && - x->props.mode != XFRM_MODE_TUNNEL) { - NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded"); - return -EINVAL; - } break; case XFRM_DEV_OFFLOAD_PACKET: if (!(mlx5_ipsec_device_caps(mdev) & @@ -253,8 +439,9 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, return -EINVAL; } - if (x->props.mode != XFRM_MODE_TRANSPORT) { - NL_SET_ERR_MSG_MOD(extack, "Only transport xfrm states may be offloaded in packet mode"); + if (x->props.mode == XFRM_MODE_TUNNEL && + !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) { + NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode"); return -EINVAL; } @@ -283,6 +470,11 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, NL_SET_ERR_MSG_MOD(extack, "Hard packet limit must be greater than soft one"); return -EINVAL; } + + if (!x->lft.soft_packet_limit || !x->lft.hard_packet_limit) { + NL_SET_ERR_MSG_MOD(extack, "Soft/hard packet limits can't be 0"); + return -EINVAL; + } break; default: NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type"); @@ -291,14 +483,134 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, return 0; } -static void _update_xfrm_state(struct work_struct *work) +static void mlx5e_ipsec_modify_state(struct work_struct *_work) +{ + struct mlx5e_ipsec_work *work = + container_of(_work, struct mlx5e_ipsec_work, work); + struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry; + struct mlx5_accel_esp_xfrm_attrs *attrs; + + attrs = &((struct mlx5e_ipsec_sa_entry *)work->data)->attrs; + + mlx5_accel_esp_modify_xfrm(sa_entry, attrs); +} + +static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct xfrm_state *x = sa_entry->x; + + if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO || + x->xso.dir != XFRM_DEV_OFFLOAD_OUT) + return; + + if (x->props.flags & XFRM_STATE_ESN) { + sa_entry->set_iv_op = mlx5e_ipsec_set_iv_esn; + return; + } + + sa_entry->set_iv_op = mlx5e_ipsec_set_iv; +} + +static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work) +{ + struct mlx5e_ipsec_work *work = + container_of(_work, struct mlx5e_ipsec_work, work); + struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry; + struct mlx5e_ipsec_netevent_data *data = work->data; + struct mlx5_accel_esp_xfrm_attrs *attrs; + + attrs = &sa_entry->attrs; + + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: + ether_addr_copy(attrs->smac, data->addr); + break; + case XFRM_DEV_OFFLOAD_OUT: + ether_addr_copy(attrs->dmac, data->addr); + break; + default: + WARN_ON_ONCE(true); + } + attrs->drop = false; + mlx5e_accel_ipsec_fs_modify(sa_entry); +} + +static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct xfrm_state *x = sa_entry->x; + struct mlx5e_ipsec_work *work; + void *data = NULL; + + switch (x->xso.type) { + case XFRM_DEV_OFFLOAD_CRYPTO: + if (!(x->props.flags & XFRM_STATE_ESN)) + return 0; + break; + case XFRM_DEV_OFFLOAD_PACKET: + if (x->props.mode != XFRM_MODE_TUNNEL) + return 0; + break; + default: + break; + } + + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return -ENOMEM; + + switch (x->xso.type) { + case XFRM_DEV_OFFLOAD_CRYPTO: + data = kzalloc(sizeof(*sa_entry), GFP_KERNEL); + if (!data) + goto free_work; + + INIT_WORK(&work->work, mlx5e_ipsec_modify_state); + break; + case XFRM_DEV_OFFLOAD_PACKET: + data = kzalloc(sizeof(struct mlx5e_ipsec_netevent_data), + GFP_KERNEL); + if (!data) + goto free_work; + + INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event); + break; + default: + break; + } + + work->data = data; + work->sa_entry = sa_entry; + sa_entry->work = work; + return 0; + +free_work: + kfree(work); + return -ENOMEM; +} + +static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry) { - struct mlx5e_ipsec_modify_state_work *modify_work = - container_of(work, struct mlx5e_ipsec_modify_state_work, work); - struct mlx5e_ipsec_sa_entry *sa_entry = container_of( - modify_work, struct mlx5e_ipsec_sa_entry, modify_work); + struct xfrm_state *x = sa_entry->x; + struct mlx5e_ipsec_dwork *dwork; + + if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) + return 0; + + if (x->xso.dir != XFRM_DEV_OFFLOAD_OUT) + return 0; + + if (x->lft.soft_packet_limit == XFRM_INF && + x->lft.hard_packet_limit == XFRM_INF) + return 0; - mlx5_accel_esp_modify_xfrm(sa_entry, &modify_work->attrs); + dwork = kzalloc(sizeof(*dwork), GFP_KERNEL); + if (!dwork) + return -ENOMEM; + + dwork->sa_entry = sa_entry; + INIT_DELAYED_WORK(&dwork->dwork, mlx5e_ipsec_handle_tx_limit); + sa_entry->dwork = dwork; + return 0; } static int mlx5e_xfrm_add_state(struct xfrm_state *x, @@ -308,6 +620,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, struct net_device *netdev = x->xso.real_dev; struct mlx5e_ipsec *ipsec; struct mlx5e_priv *priv; + gfp_t gfp; int err; priv = netdev_priv(netdev); @@ -315,30 +628,52 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, return -EOPNOTSUPP; ipsec = priv->ipsec; - err = mlx5e_xfrm_validate_state(priv->mdev, x, extack); - if (err) - return err; - - sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL); + gfp = (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) ? GFP_ATOMIC : GFP_KERNEL; + sa_entry = kzalloc(sizeof(*sa_entry), gfp); if (!sa_entry) return -ENOMEM; sa_entry->x = x; sa_entry->ipsec = ipsec; + /* Check if this SA is originated from acquire flow temporary SA */ + if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) + goto out; + + err = mlx5e_xfrm_validate_state(priv->mdev, x, extack); + if (err) + goto err_xfrm; /* check esn */ - mlx5e_ipsec_update_esn_state(sa_entry); + if (x->props.flags & XFRM_STATE_ESN) + mlx5e_ipsec_update_esn_state(sa_entry); mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs); + + err = mlx5_ipsec_create_work(sa_entry); + if (err) + goto err_xfrm; + + err = mlx5e_ipsec_create_dwork(sa_entry); + if (err) + goto release_work; + /* create hw context */ err = mlx5_ipsec_create_sa_ctx(sa_entry); if (err) - goto err_xfrm; + goto release_dwork; err = mlx5e_accel_ipsec_fs_add_rule(sa_entry); if (err) goto err_hw_ctx; + if (x->props.mode == XFRM_MODE_TUNNEL && + x->xso.type == XFRM_DEV_OFFLOAD_PACKET && + !mlx5e_ipsec_fs_tunnel_enabled(sa_entry)) { + NL_SET_ERR_MSG_MOD(extack, "Packet offload tunnel mode is disabled due to encap settings"); + err = -EINVAL; + goto err_add_rule; + } + /* We use *_bh() variant because xfrm_timer_handler(), which runs * in softirq context, can reach our state delete logic and we need * xa_erase_bh() there. @@ -348,11 +683,18 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, if (err) goto err_add_rule; - if (x->xso.dir == XFRM_DEV_OFFLOAD_OUT) - sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ? - mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv; + mlx5e_ipsec_set_esn_ops(sa_entry); + + if (sa_entry->dwork) + queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork, + MLX5_IPSEC_RESCHED); - INIT_WORK(&sa_entry->modify_work.work, _update_xfrm_state); + if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && + x->props.mode == XFRM_MODE_TUNNEL) + xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id, + MLX5E_IPSEC_TUNNEL_SA); + +out: x->xso.offload_handle = (unsigned long)sa_entry; return 0; @@ -360,32 +702,101 @@ err_add_rule: mlx5e_accel_ipsec_fs_del_rule(sa_entry); err_hw_ctx: mlx5_ipsec_free_sa_ctx(sa_entry); +release_dwork: + kfree(sa_entry->dwork); +release_work: + if (sa_entry->work) + kfree(sa_entry->work->data); + kfree(sa_entry->work); err_xfrm: kfree(sa_entry); - NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy"); + NL_SET_ERR_MSG_WEAK_MOD(extack, "Device failed to offload this state"); return err; } static void mlx5e_xfrm_del_state(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; struct mlx5e_ipsec *ipsec = sa_entry->ipsec; struct mlx5e_ipsec_sa_entry *old; + if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) + return; + old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id); WARN_ON(old != sa_entry); + + if (attrs->mode == XFRM_MODE_TUNNEL && + attrs->type == XFRM_DEV_OFFLOAD_PACKET) + /* Make sure that no ARP requests are running in parallel */ + flush_workqueue(ipsec->wq); + } static void mlx5e_xfrm_free_state(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); - cancel_work_sync(&sa_entry->modify_work.work); + if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) + goto sa_entry_free; + + if (sa_entry->work) + cancel_work_sync(&sa_entry->work->work); + + if (sa_entry->dwork) + cancel_delayed_work_sync(&sa_entry->dwork->dwork); + mlx5e_accel_ipsec_fs_del_rule(sa_entry); mlx5_ipsec_free_sa_ctx(sa_entry); + kfree(sa_entry->dwork); + if (sa_entry->work) + kfree(sa_entry->work->data); + kfree(sa_entry->work); +sa_entry_free: kfree(sa_entry); } +static int mlx5e_ipsec_netevent_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5_accel_esp_xfrm_attrs *attrs; + struct mlx5e_ipsec_netevent_data *data; + struct mlx5e_ipsec_sa_entry *sa_entry; + struct mlx5e_ipsec *ipsec; + struct neighbour *n = ptr; + struct net_device *netdev; + struct xfrm_state *x; + unsigned long idx; + + if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID)) + return NOTIFY_DONE; + + ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb); + xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) { + attrs = &sa_entry->attrs; + + if (attrs->family == AF_INET) { + if (!neigh_key_eq32(n, &attrs->saddr.a4) && + !neigh_key_eq32(n, &attrs->daddr.a4)) + continue; + } else { + if (!neigh_key_eq128(n, &attrs->saddr.a4) && + !neigh_key_eq128(n, &attrs->daddr.a4)) + continue; + } + + x = sa_entry->x; + netdev = x->xso.real_dev; + data = sa_entry->work->data; + + neigh_ha_snapshot(data->addr, n, netdev); + queue_work(ipsec->wq, &sa_entry->work->work); + } + + return NOTIFY_DONE; +} + void mlx5e_ipsec_init(struct mlx5e_priv *priv) { struct mlx5e_ipsec *ipsec; @@ -402,8 +813,8 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv) xa_init_flags(&ipsec->sadb, XA_FLAGS_ALLOC); ipsec->mdev = priv->mdev; - ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0, - priv->netdev->name); + ipsec->wq = alloc_workqueue("mlx5e_ipsec: %s", WQ_UNBOUND, 0, + priv->netdev->name); if (!ipsec->wq) goto err_wq; @@ -414,6 +825,13 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv) goto err_aso; } + if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) { + ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event; + ret = register_netevent_notifier(&ipsec->netevent_nb); + if (ret) + goto clear_aso; + } + ret = mlx5e_accel_ipsec_fs_init(ipsec); if (ret) goto err_fs_init; @@ -424,6 +842,9 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv) return; err_fs_init: + if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) + unregister_netevent_notifier(&ipsec->netevent_nb); +clear_aso: if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) mlx5e_ipsec_aso_cleanup(ipsec); err_aso: @@ -442,6 +863,8 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) return; mlx5e_accel_ipsec_fs_cleanup(ipsec); + if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) + unregister_netevent_notifier(&ipsec->netevent_nb); if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) mlx5e_ipsec_aso_cleanup(ipsec); destroy_workqueue(ipsec->wq); @@ -467,41 +890,43 @@ static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x) static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); - struct mlx5e_ipsec_modify_state_work *modify_work = - &sa_entry->modify_work; + struct mlx5e_ipsec_work *work = sa_entry->work; + struct mlx5e_ipsec_sa_entry *sa_entry_shadow; bool need_update; need_update = mlx5e_ipsec_update_esn_state(sa_entry); if (!need_update) return; - mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs); - queue_work(sa_entry->ipsec->wq, &modify_work->work); + sa_entry_shadow = work->data; + memset(sa_entry_shadow, 0x00, sizeof(*sa_entry_shadow)); + mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry_shadow->attrs); + queue_work(sa_entry->ipsec->wq, &work->work); } static void mlx5e_xfrm_update_curlft(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); - int err; + struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule; + u64 packets, bytes, lastuse; - lockdep_assert_held(&x->lock); + lockdep_assert(lockdep_is_held(&x->lock) || + lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex)); - if (sa_entry->attrs.soft_packet_limit == XFRM_INF) - /* Limits are not configured, as soft limit - * must be lowever than hard limit. - */ + if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) return; - err = mlx5e_ipsec_aso_query(sa_entry, NULL); - if (err) - return; - - mlx5e_ipsec_aso_update_curlft(sa_entry, &x->curlft.packets); + mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse); + x->curlft.packets += packets; + x->curlft.bytes += bytes; } -static int mlx5e_xfrm_validate_policy(struct xfrm_policy *x, +static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev, + struct xfrm_policy *x, struct netlink_ext_ack *extack) { + struct xfrm_selector *sel = &x->selector; + if (x->type != XFRM_POLICY_TYPE_MAIN) { NL_SET_ERR_MSG_MOD(extack, "Cannot offload non-main policy types"); return -EINVAL; @@ -519,8 +944,9 @@ static int mlx5e_xfrm_validate_policy(struct xfrm_policy *x, return -EINVAL; } - if (!x->xfrm_vec[0].reqid) { - NL_SET_ERR_MSG_MOD(extack, "Cannot offload policy without reqid"); + if (!x->xfrm_vec[0].reqid && sel->proto == IPPROTO_IP && + addr6_all_zero(sel->saddr.a6) && addr6_all_zero(sel->daddr.a6)) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported policy with reqid 0 without at least one of upper protocol or ip addr(s) different than 0"); return -EINVAL; } @@ -529,12 +955,24 @@ static int mlx5e_xfrm_validate_policy(struct xfrm_policy *x, return -EINVAL; } - if (x->selector.proto != IPPROTO_IP && - (x->selector.proto != IPPROTO_UDP || x->xdo.dir != XFRM_DEV_OFFLOAD_OUT)) { + if (sel->proto != IPPROTO_IP && + (sel->proto != IPPROTO_UDP || x->xdo.dir != XFRM_DEV_OFFLOAD_OUT)) { NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than UDP, and only Tx direction"); return -EINVAL; } + if (x->priority) { + if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO)) { + NL_SET_ERR_MSG_MOD(extack, "Device does not support policy priority"); + return -EINVAL; + } + + if (x->priority == U32_MAX) { + NL_SET_ERR_MSG_MOD(extack, "Device does not support requested policy priority"); + return -EINVAL; + } + } + return 0; } @@ -560,6 +998,7 @@ mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry, attrs->upspec.sport = ntohs(sel->sport); attrs->upspec.sport_mask = ntohs(sel->sport_mask); attrs->upspec.proto = sel->proto; + attrs->prio = x->priority; } static int mlx5e_xfrm_add_policy(struct xfrm_policy *x, @@ -576,7 +1015,7 @@ static int mlx5e_xfrm_add_policy(struct xfrm_policy *x, return -EOPNOTSUPP; } - err = mlx5e_xfrm_validate_policy(x, extack); + err = mlx5e_xfrm_validate_policy(priv->mdev, x, extack); if (err) return err; |