summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-04 02:27:18 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-04 02:27:18 +0300
commitcb8e59cc87201af93dfbb6c3dccc8fcad72a09c2 (patch)
treea334db9022f89654b777bbce8c4c6632e65b9031 /drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
parent2e63f6ce7ed2c4ff83ba30ad9ccad422289a6c63 (diff)
parent065fcfd49763ec71ae345bb5c5a74f961031e70e (diff)
downloadlinux-cb8e59cc87201af93dfbb6c3dccc8fcad72a09c2.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller: 1) Allow setting bluetooth L2CAP modes via socket option, from Luiz Augusto von Dentz. 2) Add GSO partial support to igc, from Sasha Neftin. 3) Several cleanups and improvements to r8169 from Heiner Kallweit. 4) Add IF_OPER_TESTING link state and use it when ethtool triggers a device self-test. From Andrew Lunn. 5) Start moving away from custom driver versions, use the globally defined kernel version instead, from Leon Romanovsky. 6) Support GRO vis gro_cells in DSA layer, from Alexander Lobakin. 7) Allow hard IRQ deferral during NAPI, from Eric Dumazet. 8) Add sriov and vf support to hinic, from Luo bin. 9) Support Media Redundancy Protocol (MRP) in the bridging code, from Horatiu Vultur. 10) Support netmap in the nft_nat code, from Pablo Neira Ayuso. 11) Allow UDPv6 encapsulation of ESP in the ipsec code, from Sabrina Dubroca. Also add ipv6 support for espintcp. 12) Lots of ReST conversions of the networking documentation, from Mauro Carvalho Chehab. 13) Support configuration of ethtool rxnfc flows in bcmgenet driver, from Doug Berger. 14) Allow to dump cgroup id and filter by it in inet_diag code, from Dmitry Yakunin. 15) Add infrastructure to export netlink attribute policies to userspace, from Johannes Berg. 16) Several optimizations to sch_fq scheduler, from Eric Dumazet. 17) Fallback to the default qdisc if qdisc init fails because otherwise a packet scheduler init failure will make a device inoperative. From Jesper Dangaard Brouer. 18) Several RISCV bpf jit optimizations, from Luke Nelson. 19) Correct the return type of the ->ndo_start_xmit() method in several drivers, it's netdev_tx_t but many drivers were using 'int'. From Yunjian Wang. 20) Add an ethtool interface for PHY master/slave config, from Oleksij Rempel. 21) Add BPF iterators, from Yonghang Song. 22) Add cable test infrastructure, including ethool interfaces, from Andrew Lunn. Marvell PHY driver is the first to support this facility. 23) Remove zero-length arrays all over, from Gustavo A. R. Silva. 24) Calculate and maintain an explicit frame size in XDP, from Jesper Dangaard Brouer. 25) Add CAP_BPF, from Alexei Starovoitov. 26) Support terse dumps in the packet scheduler, from Vlad Buslov. 27) Support XDP_TX bulking in dpaa2 driver, from Ioana Ciornei. 28) Add devm_register_netdev(), from Bartosz Golaszewski. 29) Minimize qdisc resets, from Cong Wang. 30) Get rid of kernel_getsockopt and kernel_setsockopt in order to eliminate set_fs/get_fs calls. From Christoph Hellwig. * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2517 commits) selftests: net: ip_defrag: ignore EPERM net_failover: fixed rollback in net_failover_open() Revert "tipc: Fix potential tipc_aead refcnt leak in tipc_crypto_rcv" Revert "tipc: Fix potential tipc_node refcnt leak in tipc_rcv" vmxnet3: allow rx flow hash ops only when rss is enabled hinic: add set_channels ethtool_ops support selftests/bpf: Add a default $(CXX) value tools/bpf: Don't use $(COMPILE.c) bpf, selftests: Use bpf_probe_read_kernel s390/bpf: Use bcr 0,%0 as tail call nop filler s390/bpf: Maintain 8-byte stack alignment selftests/bpf: Fix verifier test selftests/bpf: Fix sample_cnt shared between two threads bpf, selftests: Adapt cls_redirect to call csum_level helper bpf: Add csum_level helper for fixing up csum levels bpf: Fix up bpf_skb_adjust_room helper's skb csum setting sfc: add missing annotation for efx_ef10_try_update_nic_stats_vf() crypto/chtls: IPv6 support for inline TLS Crypto/chcr: Fixes a coccinile check error Crypto/chcr: Fixes compilations warnings ...
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_rep.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c935
1 files changed, 42 insertions, 893 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 4a8e0dfdc5f2..006807e04eda 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -35,7 +35,6 @@
#include <net/switchdev.h>
#include <net/pkt_cls.h>
#include <net/act_api.h>
-#include <net/netevent.h>
#include <net/arp.h>
#include <net/devlink.h>
#include <net/ipv6_stubs.h>
@@ -45,9 +44,9 @@
#include "en.h"
#include "en_rep.h"
#include "en_tc.h"
-#include "en/tc_tun.h"
+#include "en/rep/tc.h"
+#include "en/rep/neigh.h"
#include "fs_core.h"
-#include "lib/port_tun.h"
#include "lib/mlx5.h"
#define CREATE_TRACE_POINTS
#include "diag/en_rep_tracepoint.h"
@@ -58,16 +57,6 @@
static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
-struct mlx5e_rep_indr_block_priv {
- struct net_device *netdev;
- struct mlx5e_rep_priv *rpriv;
-
- struct list_head list;
-};
-
-static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
- struct net_device *netdev);
-
static void mlx5e_rep_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
{
@@ -485,706 +474,6 @@ void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
mlx5e_sqs2vport_stop(esw, rep);
}
-static unsigned long mlx5e_rep_ipv6_interval(void)
-{
- if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
- return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
-
- return ~0UL;
-}
-
-static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
-{
- unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
- unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
- struct net_device *netdev = rpriv->netdev;
- struct mlx5e_priv *priv = netdev_priv(netdev);
-
- rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
- mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
-}
-
-void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
-
- mlx5_fc_queue_stats_work(priv->mdev,
- &neigh_update->neigh_stats_work,
- neigh_update->min_interval);
-}
-
-static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
-{
- return refcount_inc_not_zero(&nhe->refcnt);
-}
-
-static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
-
-static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
-{
- if (refcount_dec_and_test(&nhe->refcnt)) {
- mlx5e_rep_neigh_entry_remove(nhe);
- kfree_rcu(nhe, rcu);
- }
-}
-
-static struct mlx5e_neigh_hash_entry *
-mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
- struct mlx5e_neigh_hash_entry *nhe)
-{
- struct mlx5e_neigh_hash_entry *next = NULL;
-
- rcu_read_lock();
-
- for (next = nhe ?
- list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
- &nhe->neigh_list,
- struct mlx5e_neigh_hash_entry,
- neigh_list) :
- list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
- struct mlx5e_neigh_hash_entry,
- neigh_list);
- next;
- next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
- &next->neigh_list,
- struct mlx5e_neigh_hash_entry,
- neigh_list))
- if (mlx5e_rep_neigh_entry_hold(next))
- break;
-
- rcu_read_unlock();
-
- if (nhe)
- mlx5e_rep_neigh_entry_release(nhe);
-
- return next;
-}
-
-static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
-{
- struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
- neigh_update.neigh_stats_work.work);
- struct net_device *netdev = rpriv->netdev;
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_neigh_hash_entry *nhe = NULL;
-
- rtnl_lock();
- if (!list_empty(&rpriv->neigh_update.neigh_list))
- mlx5e_rep_queue_neigh_stats_work(priv);
-
- while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
- mlx5e_tc_update_neigh_used_value(nhe);
-
- rtnl_unlock();
-}
-
-static void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e,
- bool neigh_connected,
- unsigned char ha[ETH_ALEN])
-{
- struct ethhdr *eth = (struct ethhdr *)e->encap_header;
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- bool encap_connected;
- LIST_HEAD(flow_list);
-
- ASSERT_RTNL();
-
- /* wait for encap to be fully initialized */
- wait_for_completion(&e->res_ready);
-
- mutex_lock(&esw->offloads.encap_tbl_lock);
- encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
- if (e->compl_result < 0 || (encap_connected == neigh_connected &&
- ether_addr_equal(e->h_dest, ha)))
- goto unlock;
-
- mlx5e_take_all_encap_flows(e, &flow_list);
-
- if ((e->flags & MLX5_ENCAP_ENTRY_VALID) &&
- (!neigh_connected || !ether_addr_equal(e->h_dest, ha)))
- mlx5e_tc_encap_flows_del(priv, e, &flow_list);
-
- if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
- ether_addr_copy(e->h_dest, ha);
- ether_addr_copy(eth->h_dest, ha);
- /* Update the encap source mac, in case that we delete
- * the flows when encap source mac changed.
- */
- ether_addr_copy(eth->h_source, e->route_dev->dev_addr);
-
- mlx5e_tc_encap_flows_add(priv, e, &flow_list);
- }
-unlock:
- mutex_unlock(&esw->offloads.encap_tbl_lock);
- mlx5e_put_encap_flow_list(priv, &flow_list);
-}
-
-static void mlx5e_rep_neigh_update(struct work_struct *work)
-{
- struct mlx5e_neigh_hash_entry *nhe =
- container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
- struct neighbour *n = nhe->n;
- struct mlx5e_encap_entry *e;
- unsigned char ha[ETH_ALEN];
- struct mlx5e_priv *priv;
- bool neigh_connected;
- u8 nud_state, dead;
-
- rtnl_lock();
-
- /* If these parameters are changed after we release the lock,
- * we'll receive another event letting us know about it.
- * We use this lock to avoid inconsistency between the neigh validity
- * and it's hw address.
- */
- read_lock_bh(&n->lock);
- memcpy(ha, n->ha, ETH_ALEN);
- nud_state = n->nud_state;
- dead = n->dead;
- read_unlock_bh(&n->lock);
-
- neigh_connected = (nud_state & NUD_VALID) && !dead;
-
- trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
-
- list_for_each_entry(e, &nhe->encap_list, encap_list) {
- if (!mlx5e_encap_take(e))
- continue;
-
- priv = netdev_priv(e->out_dev);
- mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
- mlx5e_encap_put(priv, e);
- }
- mlx5e_rep_neigh_entry_release(nhe);
- rtnl_unlock();
- neigh_release(n);
-}
-
-static struct mlx5e_rep_indr_block_priv *
-mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
- struct net_device *netdev)
-{
- struct mlx5e_rep_indr_block_priv *cb_priv;
-
- /* All callback list access should be protected by RTNL. */
- ASSERT_RTNL();
-
- list_for_each_entry(cb_priv,
- &rpriv->uplink_priv.tc_indr_block_priv_list,
- list)
- if (cb_priv->netdev == netdev)
- return cb_priv;
-
- return NULL;
-}
-
-static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv)
-{
- struct mlx5e_rep_indr_block_priv *cb_priv, *temp;
- struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list;
-
- list_for_each_entry_safe(cb_priv, temp, head, list) {
- mlx5e_rep_indr_unregister_block(rpriv, cb_priv->netdev);
- kfree(cb_priv);
- }
-}
-
-static int
-mlx5e_rep_indr_offload(struct net_device *netdev,
- struct flow_cls_offload *flower,
- struct mlx5e_rep_indr_block_priv *indr_priv,
- unsigned long flags)
-{
- struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
- int err = 0;
-
- switch (flower->command) {
- case FLOW_CLS_REPLACE:
- err = mlx5e_configure_flower(netdev, priv, flower, flags);
- break;
- case FLOW_CLS_DESTROY:
- err = mlx5e_delete_flower(netdev, priv, flower, flags);
- break;
- case FLOW_CLS_STATS:
- err = mlx5e_stats_flower(netdev, priv, flower, flags);
- break;
- default:
- err = -EOPNOTSUPP;
- }
-
- return err;
-}
-
-static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type,
- void *type_data, void *indr_priv)
-{
- unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
- struct mlx5e_rep_indr_block_priv *priv = indr_priv;
-
- switch (type) {
- case TC_SETUP_CLSFLOWER:
- return mlx5e_rep_indr_offload(priv->netdev, type_data, priv,
- flags);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type,
- void *type_data, void *indr_priv)
-{
- struct mlx5e_rep_indr_block_priv *priv = indr_priv;
- struct flow_cls_offload *f = type_data;
- struct flow_cls_offload tmp;
- struct mlx5e_priv *mpriv;
- struct mlx5_eswitch *esw;
- unsigned long flags;
- int err;
-
- mpriv = netdev_priv(priv->rpriv->netdev);
- esw = mpriv->mdev->priv.eswitch;
-
- flags = MLX5_TC_FLAG(EGRESS) |
- MLX5_TC_FLAG(ESW_OFFLOAD) |
- MLX5_TC_FLAG(FT_OFFLOAD);
-
- switch (type) {
- case TC_SETUP_CLSFLOWER:
- memcpy(&tmp, f, sizeof(*f));
-
- /* Re-use tc offload path by moving the ft flow to the
- * reserved ft chain.
- *
- * FT offload can use prio range [0, INT_MAX], so we normalize
- * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
- * as with tc, where prio 0 isn't supported.
- *
- * We only support chain 0 of FT offload.
- */
- if (!mlx5_esw_chains_prios_supported(esw) ||
- tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw) ||
- tmp.common.chain_index)
- return -EOPNOTSUPP;
-
- tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
- tmp.common.prio++;
- err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags);
- memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
- return err;
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static void mlx5e_rep_indr_block_unbind(void *cb_priv)
-{
- struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv;
-
- list_del(&indr_priv->list);
- kfree(indr_priv);
-}
-
-static LIST_HEAD(mlx5e_block_cb_list);
-
-static int
-mlx5e_rep_indr_setup_block(struct net_device *netdev,
- struct mlx5e_rep_priv *rpriv,
- struct flow_block_offload *f,
- flow_setup_cb_t *setup_cb)
-{
- struct mlx5e_rep_indr_block_priv *indr_priv;
- struct flow_block_cb *block_cb;
-
- if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
- return -EOPNOTSUPP;
-
- f->unlocked_driver_cb = true;
- f->driver_block_list = &mlx5e_block_cb_list;
-
- switch (f->command) {
- case FLOW_BLOCK_BIND:
- indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
- if (indr_priv)
- return -EEXIST;
-
- indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
- if (!indr_priv)
- return -ENOMEM;
-
- indr_priv->netdev = netdev;
- indr_priv->rpriv = rpriv;
- list_add(&indr_priv->list,
- &rpriv->uplink_priv.tc_indr_block_priv_list);
-
- block_cb = flow_block_cb_alloc(setup_cb, indr_priv, indr_priv,
- mlx5e_rep_indr_block_unbind);
- if (IS_ERR(block_cb)) {
- list_del(&indr_priv->list);
- kfree(indr_priv);
- return PTR_ERR(block_cb);
- }
- flow_block_cb_add(block_cb, f);
- list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list);
-
- return 0;
- case FLOW_BLOCK_UNBIND:
- indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
- if (!indr_priv)
- return -ENOENT;
-
- block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv);
- if (!block_cb)
- return -ENOENT;
-
- flow_block_cb_remove(block_cb, f);
- list_del(&block_cb->driver_list);
- return 0;
- default:
- return -EOPNOTSUPP;
- }
- return 0;
-}
-
-static
-int mlx5e_rep_indr_setup_cb(struct net_device *netdev, void *cb_priv,
- enum tc_setup_type type, void *type_data)
-{
- switch (type) {
- case TC_SETUP_BLOCK:
- return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data,
- mlx5e_rep_indr_setup_tc_cb);
- case TC_SETUP_FT:
- return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data,
- mlx5e_rep_indr_setup_ft_cb);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
- struct net_device *netdev)
-{
- int err;
-
- err = __flow_indr_block_cb_register(netdev, rpriv,
- mlx5e_rep_indr_setup_cb,
- rpriv);
- if (err) {
- struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
-
- mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n",
- netdev_name(netdev), err);
- }
- return err;
-}
-
-static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
- struct net_device *netdev)
-{
- __flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_cb,
- rpriv);
-}
-
-static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
- unsigned long event, void *ptr)
-{
- struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
- uplink_priv.netdevice_nb);
- struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
- struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
-
- if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
- !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev))
- return NOTIFY_OK;
-
- switch (event) {
- case NETDEV_REGISTER:
- mlx5e_rep_indr_register_block(rpriv, netdev);
- break;
- case NETDEV_UNREGISTER:
- mlx5e_rep_indr_unregister_block(rpriv, netdev);
- break;
- }
- return NOTIFY_OK;
-}
-
-static void
-mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv,
- struct mlx5e_neigh_hash_entry *nhe,
- struct neighbour *n)
-{
- /* Take a reference to ensure the neighbour and mlx5 encap
- * entry won't be destructed until we drop the reference in
- * delayed work.
- */
- neigh_hold(n);
-
- /* This assignment is valid as long as the the neigh reference
- * is taken
- */
- nhe->n = n;
-
- if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
- mlx5e_rep_neigh_entry_release(nhe);
- neigh_release(n);
- }
-}
-
-static struct mlx5e_neigh_hash_entry *
-mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
- struct mlx5e_neigh *m_neigh);
-
-static int mlx5e_rep_netevent_event(struct notifier_block *nb,
- unsigned long event, void *ptr)
-{
- struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
- neigh_update.netevent_nb);
- struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
- struct net_device *netdev = rpriv->netdev;
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_neigh_hash_entry *nhe = NULL;
- struct mlx5e_neigh m_neigh = {};
- struct neigh_parms *p;
- struct neighbour *n;
- bool found = false;
-
- switch (event) {
- case NETEVENT_NEIGH_UPDATE:
- n = ptr;
-#if IS_ENABLED(CONFIG_IPV6)
- if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
-#else
- if (n->tbl != &arp_tbl)
-#endif
- return NOTIFY_DONE;
-
- m_neigh.dev = n->dev;
- m_neigh.family = n->ops->family;
- memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
-
- rcu_read_lock();
- nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
- rcu_read_unlock();
- if (!nhe)
- return NOTIFY_DONE;
-
- mlx5e_rep_queue_neigh_update_work(priv, nhe, n);
- break;
-
- case NETEVENT_DELAY_PROBE_TIME_UPDATE:
- p = ptr;
-
- /* We check the device is present since we don't care about
- * changes in the default table, we only care about changes
- * done per device delay prob time parameter.
- */
-#if IS_ENABLED(CONFIG_IPV6)
- if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
-#else
- if (!p->dev || p->tbl != &arp_tbl)
-#endif
- return NOTIFY_DONE;
-
- rcu_read_lock();
- list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
- neigh_list) {
- if (p->dev == nhe->m_neigh.dev) {
- found = true;
- break;
- }
- }
- rcu_read_unlock();
- if (!found)
- return NOTIFY_DONE;
-
- neigh_update->min_interval = min_t(unsigned long,
- NEIGH_VAR(p, DELAY_PROBE_TIME),
- neigh_update->min_interval);
- mlx5_fc_update_sampling_interval(priv->mdev,
- neigh_update->min_interval);
- break;
- }
- return NOTIFY_DONE;
-}
-
-static const struct rhashtable_params mlx5e_neigh_ht_params = {
- .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
- .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
- .key_len = sizeof(struct mlx5e_neigh),
- .automatic_shrinking = true,
-};
-
-static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
-{
- struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
- int err;
-
- err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
- if (err)
- return err;
-
- INIT_LIST_HEAD(&neigh_update->neigh_list);
- mutex_init(&neigh_update->encap_lock);
- INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
- mlx5e_rep_neigh_stats_work);
- mlx5e_rep_neigh_update_init_interval(rpriv);
-
- rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event;
- err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb);
- if (err)
- goto out_err;
- return 0;
-
-out_err:
- rhashtable_destroy(&neigh_update->neigh_ht);
- return err;
-}
-
-static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
-{
- struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
- struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
-
- unregister_netevent_notifier(&neigh_update->netevent_nb);
-
- flush_workqueue(priv->wq); /* flush neigh update works */
-
- cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
-
- mutex_destroy(&neigh_update->encap_lock);
- rhashtable_destroy(&neigh_update->neigh_ht);
-}
-
-static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
- struct mlx5e_neigh_hash_entry *nhe)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- int err;
-
- err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
- &nhe->rhash_node,
- mlx5e_neigh_ht_params);
- if (err)
- return err;
-
- list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
-
- return err;
-}
-
-static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
-{
- struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
-
- mutex_lock(&rpriv->neigh_update.encap_lock);
-
- list_del_rcu(&nhe->neigh_list);
-
- rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
- &nhe->rhash_node,
- mlx5e_neigh_ht_params);
- mutex_unlock(&rpriv->neigh_update.encap_lock);
-}
-
-/* This function must only be called under the representor's encap_lock or
- * inside rcu read lock section.
- */
-static struct mlx5e_neigh_hash_entry *
-mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
- struct mlx5e_neigh *m_neigh)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
- struct mlx5e_neigh_hash_entry *nhe;
-
- nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
- mlx5e_neigh_ht_params);
- return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
-}
-
-static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e,
- struct mlx5e_neigh_hash_entry **nhe)
-{
- int err;
-
- *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
- if (!*nhe)
- return -ENOMEM;
-
- (*nhe)->priv = priv;
- memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
- INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
- spin_lock_init(&(*nhe)->encap_list_lock);
- INIT_LIST_HEAD(&(*nhe)->encap_list);
- refcount_set(&(*nhe)->refcnt, 1);
-
- err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
- if (err)
- goto out_free;
- return 0;
-
-out_free:
- kfree(*nhe);
- return err;
-}
-
-int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
- struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
- struct mlx5e_neigh_hash_entry *nhe;
- int err;
-
- err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type);
- if (err)
- return err;
-
- mutex_lock(&rpriv->neigh_update.encap_lock);
- nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
- if (!nhe) {
- err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
- if (err) {
- mutex_unlock(&rpriv->neigh_update.encap_lock);
- mlx5_tun_entropy_refcount_dec(tun_entropy,
- e->reformat_type);
- return err;
- }
- }
-
- e->nhe = nhe;
- spin_lock(&nhe->encap_list_lock);
- list_add_rcu(&e->encap_list, &nhe->encap_list);
- spin_unlock(&nhe->encap_list_lock);
-
- mutex_unlock(&rpriv->neigh_update.encap_lock);
-
- return 0;
-}
-
-void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
- struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
-
- if (!e->nhe)
- return;
-
- spin_lock(&e->nhe->encap_list_lock);
- list_del_rcu(&e->encap_list);
- spin_unlock(&e->nhe->encap_list_lock);
-
- mlx5e_rep_neigh_entry_release(e->nhe);
- e->nhe = NULL;
- mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
-}
-
static int mlx5e_rep_open(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
@@ -1225,129 +514,6 @@ static int mlx5e_rep_close(struct net_device *dev)
return ret;
}
-static int
-mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
- struct flow_cls_offload *cls_flower, int flags)
-{
- switch (cls_flower->command) {
- case FLOW_CLS_REPLACE:
- return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
- flags);
- case FLOW_CLS_DESTROY:
- return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
- flags);
- case FLOW_CLS_STATS:
- return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
- flags);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static
-int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv,
- struct tc_cls_matchall_offload *ma)
-{
- switch (ma->command) {
- case TC_CLSMATCHALL_REPLACE:
- return mlx5e_tc_configure_matchall(priv, ma);
- case TC_CLSMATCHALL_DESTROY:
- return mlx5e_tc_delete_matchall(priv, ma);
- case TC_CLSMATCHALL_STATS:
- mlx5e_tc_stats_matchall(priv, ma);
- return 0;
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
- void *cb_priv)
-{
- unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
- struct mlx5e_priv *priv = cb_priv;
-
- switch (type) {
- case TC_SETUP_CLSFLOWER:
- return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags);
- case TC_SETUP_CLSMATCHALL:
- return mlx5e_rep_setup_tc_cls_matchall(priv, type_data);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
- void *cb_priv)
-{
- struct flow_cls_offload tmp, *f = type_data;
- struct mlx5e_priv *priv = cb_priv;
- struct mlx5_eswitch *esw;
- unsigned long flags;
- int err;
-
- flags = MLX5_TC_FLAG(INGRESS) |
- MLX5_TC_FLAG(ESW_OFFLOAD) |
- MLX5_TC_FLAG(FT_OFFLOAD);
- esw = priv->mdev->priv.eswitch;
-
- switch (type) {
- case TC_SETUP_CLSFLOWER:
- memcpy(&tmp, f, sizeof(*f));
-
- if (!mlx5_esw_chains_prios_supported(esw))
- return -EOPNOTSUPP;
-
- /* Re-use tc offload path by moving the ft flow to the
- * reserved ft chain.
- *
- * FT offload can use prio range [0, INT_MAX], so we normalize
- * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
- * as with tc, where prio 0 isn't supported.
- *
- * We only support chain 0 of FT offload.
- */
- if (tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw))
- return -EOPNOTSUPP;
- if (tmp.common.chain_index != 0)
- return -EOPNOTSUPP;
-
- tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
- tmp.common.prio++;
- err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
- memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
- return err;
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static LIST_HEAD(mlx5e_rep_block_tc_cb_list);
-static LIST_HEAD(mlx5e_rep_block_ft_cb_list);
-static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
- void *type_data)
-{
- struct mlx5e_priv *priv = netdev_priv(dev);
- struct flow_block_offload *f = type_data;
-
- f->unlocked_driver_cb = true;
-
- switch (type) {
- case TC_SETUP_BLOCK:
- return flow_block_cb_setup_simple(type_data,
- &mlx5e_rep_block_tc_cb_list,
- mlx5e_rep_setup_tc_cb,
- priv, priv, true);
- case TC_SETUP_FT:
- return flow_block_cb_setup_simple(type_data,
- &mlx5e_rep_block_ft_cb_list,
- mlx5e_rep_setup_ft_cb,
- priv, priv, true);
- default:
- return -EOPNOTSUPP;
- }
-}
-
bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -1540,10 +706,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
/* we want a persistent mac for the uplink rep */
mlx5_query_mac_address(mdev, netdev->dev_addr);
netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops;
-#ifdef CONFIG_MLX5_CORE_EN_DCB
- if (MLX5_CAP_GEN(mdev, qos))
- netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
-#endif
+ mlx5e_dcbnl_build_rep_netdev(netdev);
} else {
netdev->netdev_ops = &mlx5e_netdev_ops_rep;
eth_hw_addr_random(netdev);
@@ -1691,6 +854,24 @@ static int mlx5e_create_rep_vport_rx_rule(struct mlx5e_priv *priv)
return 0;
}
+static void rep_vport_rx_rule_destroy(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ if (!rpriv->vport_rx_rule)
+ return;
+
+ mlx5_del_flow_rules(rpriv->vport_rx_rule);
+ rpriv->vport_rx_rule = NULL;
+}
+
+int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup)
+{
+ rep_vport_rx_rule_destroy(priv);
+
+ return cleanup ? 0 : mlx5e_create_rep_vport_rx_rule(priv);
+}
+
static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
@@ -1755,9 +936,7 @@ err_close_drop_rq:
static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
-
- mlx5_del_flow_rules(rpriv->vport_rx_rule);
+ rep_vport_rx_rule_destroy(priv);
mlx5e_destroy_rep_root_ft(priv);
mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
@@ -1790,31 +969,25 @@ static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
priv = netdev_priv(netdev);
uplink_priv = &rpriv->uplink_priv;
- mutex_init(&uplink_priv->unready_flows_lock);
- INIT_LIST_HEAD(&uplink_priv->unready_flows);
-
- /* init shared tc flow table */
- err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
+ err = mlx5e_rep_tc_init(rpriv);
if (err)
return err;
mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev);
- /* init indirect block notifications */
- INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
- uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event;
- err = register_netdevice_notifier_dev_net(rpriv->netdev,
- &uplink_priv->netdevice_nb,
- &uplink_priv->netdevice_nn);
+ mlx5e_rep_bond_init(rpriv);
+ err = mlx5e_rep_tc_netdevice_event_register(rpriv);
if (err) {
- mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n");
- goto tc_esw_cleanup;
+ mlx5_core_err(priv->mdev, "Failed to register netdev notifier, err: %d\n",
+ err);
+ goto err_event_reg;
}
return 0;
-tc_esw_cleanup:
- mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht);
+err_event_reg:
+ mlx5e_rep_bond_cleanup(rpriv);
+ mlx5e_rep_tc_cleanup(rpriv);
return err;
}
@@ -1844,17 +1017,9 @@ destroy_tises:
static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
{
- struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
-
- /* clean indirect TC block notifications */
- unregister_netdevice_notifier_dev_net(rpriv->netdev,
- &uplink_priv->netdevice_nb,
- &uplink_priv->netdevice_nn);
- mlx5e_rep_indr_clean_block_privs(rpriv);
-
- /* delete shared tc flow table */
- mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht);
- mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
+ mlx5e_rep_tc_netdevice_event_unregister(rpriv);
+ mlx5e_rep_bond_cleanup(rpriv);
+ mlx5e_rep_tc_cleanup(rpriv);
}
static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
@@ -1896,13 +1061,8 @@ static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event
return NOTIFY_OK;
}
- if (event == MLX5_DEV_EVENT_PORT_AFFINITY) {
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
-
- queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
-
- return NOTIFY_OK;
- }
+ if (event == MLX5_DEV_EVENT_PORT_AFFINITY)
+ return mlx5e_rep_tc_event_port_affinity(priv);
return NOTIFY_DONE;
}
@@ -1911,7 +1071,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
{
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
u16 max_mtu;
netdev->min_mtu = ETH_MIN_MTU;
@@ -1919,28 +1078,22 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
mlx5e_set_dev_port_mtu(priv);
- INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
- mlx5e_tc_reoffload_flows_work);
+ mlx5e_rep_tc_enable(priv);
mlx5_lag_add(mdev, netdev);
priv->events_nb.notifier_call = uplink_rep_async_event;
mlx5_notifier_register(mdev, &priv->events_nb);
-#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_initialize(priv);
mlx5e_dcbnl_init_app(priv);
-#endif
}
static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
-#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_delete_app(priv);
-#endif
mlx5_notifier_unregister(mdev, &priv->events_nb);
- cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
+ mlx5e_rep_tc_disable(priv);
mlx5_lag_remove(mdev);
}
@@ -2047,26 +1200,22 @@ static int register_devlink_port(struct mlx5_core_dev *dev,
return 0;
mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid);
+ dl_port_index = vport_to_devlink_port_index(dev, rep->vport);
pfnum = PCI_FUNC(dev->pdev->devfn);
- if (rep->vport == MLX5_VPORT_UPLINK) {
+ if (rep->vport == MLX5_VPORT_UPLINK)
devlink_port_attrs_set(&rpriv->dl_port,
DEVLINK_PORT_FLAVOUR_PHYSICAL,
pfnum, false, 0,
&ppid.id[0], ppid.id_len);
- dl_port_index = vport_to_devlink_port_index(dev, rep->vport);
- } else if (rep->vport == MLX5_VPORT_PF) {
+ else if (rep->vport == MLX5_VPORT_PF)
devlink_port_attrs_pci_pf_set(&rpriv->dl_port,
&ppid.id[0], ppid.id_len,
pfnum);
- dl_port_index = rep->vport;
- } else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch,
- rpriv->rep->vport)) {
+ else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport))
devlink_port_attrs_pci_vf_set(&rpriv->dl_port,
&ppid.id[0], ppid.id_len,
pfnum, rep->vport - 1);
- dl_port_index = vport_to_devlink_port_index(dev, rep->vport);
- }
return devlink_port_register(devlink, &rpriv->dl_port, dl_port_index);
}